2#include "SparseOctree.h"
4#include <thrust/sort.h>
9 DYN_FUNC OcKey CalculateMortonCode(Level l, OcIndex x, OcIndex y, OcIndex z)
17 for (int i = 0; i < MAX_LEVEL; i++)
19 key |= (x & 1U << i) << 2 * i | (y & 1U << i) << (2 * i + 1) | (z & 1U << i) << (2 * i + 2);
25 DYN_FUNC void RecoverFromMortonCode(OcKey key, Level& l, OcIndex& x, OcIndex& y, OcIndex& z)
30 void print(DArray<OctreeNode>& d_arr)
32 CArray<OctreeNode> h_arr;
33 h_arr.resize(d_arr.size());
37 for (uint i = 0; i < h_arr.size(); i++)
40 OcIndex tnx, tny, tnz;
41 h_arr[i].getCoord(level, tnx, tny, tnz);
42 //std::cout << "Poster order: " << i << " " << h_arr[i].key() << " " << tnx << " " << tny << " " << tnz << std::endl;
43 printf("Node %d: key: %d - %d: %d %d %d : ", i, h_arr[i].m_key, level, tnx, tny, tnz);
44 printf(" Data size: %d ; start loc: %d; data loc: %d; first child: %d ", h_arr[i].getDataSize(), h_arr[i].getStartIndex(), h_arr[i].m_data_loc, h_arr[i].m_first_child_loc);
47 for (int j = 0; j < 8; j++)
49 printf(" %d: %d ", j, h_arr[i].childs[j]);
59 DYN_FUNC OctreeNode::OctreeNode()
63 for (int i = 0; i < 8; i++)
69 DYN_FUNC OctreeNode::OctreeNode(OcKey key)
79 m_level = m_level == 0 ? 0 : m_level - 1;
81 for (int i = 0; i < 8; i++)
87 DYN_FUNC OctreeNode::OctreeNode(Level l, OcIndex x, OcIndex y, OcIndex z)
91 m_key = CalculateMortonCode(l, x, y, z);
93 for (int i = 0; i < 8; i++)
100 DYN_FUNC void OctreeNode::getCoord(Level& l, OcIndex& x, OcIndex& y, OcIndex& z)
107 for (int i = 0; i < MAX_LEVEL; i++)
109 x |= (m_key & (1U << DIM * i)) >> 2 * i;
110 y |= (m_key & (1U << DIM * i + 1)) >> (2 * i + 1);
111 z |= (m_key & (1U << DIM * i + 2)) >> (2 * i + 2);
114 x &= ((1U << l) - 1);
115 y &= ((1U << l) - 1);
116 z &= ((1U << l) - 1);
120 DYN_FUNC bool OctreeNode::operator==(const OctreeNode& mc2) const
122 return m_key == mc2.m_key;
125 DYN_FUNC bool OctreeNode::operator>=(const OctreeNode& mc2) const
127 if (mc2.isContainedStrictlyIn(*this))
133 m_level > mc2.m_level ? (k1 = k1 >> DIM * (m_level - mc2.m_level)) : (k2 = k2 >> DIM * (mc2.m_level - m_level));
138 DYN_FUNC bool OctreeNode::operator>(const OctreeNode& mc2) const
140 if (isContainedStrictlyIn(mc2))
148 m_level > mc2.m_level ? (k1 = k1 >> DIM * (m_level - mc2.m_level)) : (k2 = k2 >> DIM * (mc2.m_level - m_level));
153 DYN_FUNC bool OctreeNode::operator<=(const OctreeNode& mc2) const
155 return ~(*this > mc2);
158 DYN_FUNC bool OctreeNode::operator<(const OctreeNode& mc2) const
160 return ~(*this >= mc2);
163 DYN_FUNC bool OctreeNode::isContainedIn(const OctreeNode& mc2) const
165 if (m_level < mc2.m_level)
170 auto k1 = m_key >> 3 * (m_level - mc2.m_level);
176 DYN_FUNC bool OctreeNode::isContainedStrictlyIn(const OctreeNode& mc2) const
178 if (m_level <= mc2.m_level)
183 auto k1 = m_key >> DIM * (m_level - mc2.m_level);
190 DYN_FUNC OctreeNode OctreeNode::leastCommonAncestor(const OctreeNode& mc2) const
193 OcKey k2 = mc2.m_key;
195 m_level > mc2.m_level ? (k1 = k1 >> DIM * (m_level - mc2.m_level)) : (k2 = k2 >> DIM * (mc2.m_level - m_level));
202 return OctreeNode(k1);
205 // DYN_FUNC MortonCode3D& MortonCode3D::operator=(const MortonCode3D & mc2)
207 // m_key = mc2.m_key;
211 // DYN_FUNC MortonCode3D MortonCode3D::operator=(const MortonCode3D & mc2) const
213 // return MortonCode3D(mc2.m_key);
216 DYN_FUNC bool OctreeNode::operator==(const OcKey k) const
221 DYN_FUNC bool OctreeNode::operator!=(const OcKey k) const
227 template<typename TDataType>
228 SparseOctree<TDataType>::SparseOctree()
232 template<typename TDataType>
233 SparseOctree<TDataType>::~SparseOctree()
237 template<typename TDataType>
238 void SparseOctree<TDataType>::release()
241 m_post_ordered_nodes.clear();
245 nonRepeatNodes_cpy.clear();
247 duplicates_count.clear();
250 template<typename TDataType>
251 void SparseOctree<TDataType>::setSpace(Coord lo, Real h, Real L)
257 Real segments = m_L / h;
259 m_level_max = ceil(log2(segments));
260 //if (m_level_max < 1) m_level_max = 1;
263 template<typename Real, typename Coord>
264 __global__ void SO_ConstructAABB(
269 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
271 if (tId >= pos.size()) return;
275 aabb[tId].v0 = p - radius;
276 aabb[tId].v1 = p + radius;
280 template<typename TDataType>
281 CPU_FUNC OctreeNode SparseOctree<TDataType>::queryNode(Level l, OcIndex x, OcIndex y, OcIndex z)
283 OcKey key = CalculateMortonCode(l, x, y, z);
285 OcKey mask = 7U << 3 * l;
287 CArray<OctreeNode> h_ordered_nodes(m_post_ordered_nodes.size());
288 h_ordered_nodes.assign(m_post_ordered_nodes);
290 OctreeNode node = h_ordered_nodes[h_ordered_nodes.size() - 1];
292 //root: level 0, i should be indexed from 1 to l
293 for (int i = 1; i <= l; i++)
295 OcKey mask_i = mask >> DIM * i;
296 int child_index = (key & mask_i) >> (DIM * (l - i));
298 if (node.childs[child_index] == EMPTY)
304 node = h_ordered_nodes[node.childs[child_index]];
308 h_ordered_nodes.clear();
310 if (key == node.key())
317 template<typename TDataType>
318 GPU_FUNC Level SparseOctree<TDataType>::requestLevelNumber(const AABB box)
320 return SO_ComputeLevel(box, m_h, m_level_max);
324 template<typename TDataType>
325 GPU_FUNC int SparseOctree<TDataType>::requestIntersectionNumber(const AABB box)
329 int nx_lo, ny_lo, nz_lo;
330 int nx_hi, ny_hi, nz_hi;
332 int num = SO_ComputeRange(level, nx_lo, nx_hi, ny_lo, ny_hi, nz_lo, nz_hi, box, m_lo, m_h, m_level_max);
335 //box.intersect(box);
339 for (int i = nx_lo; i <= nx_hi; i++)
340 for (int j = ny_lo; j <= ny_hi; j++)
341 for (int k = nz_lo; k <= nz_hi; k++)
343 OcKey mc = CalculateMortonCode(level, i, j, k);
345 int d_num = this->requestIntersectionNumber(mc, level);
354 template<typename TDataType>
355 GPU_FUNC void SparseOctree<TDataType>::reqeustIntersectionIds(int* ids, const AABB box)
359 int nx_lo, ny_lo, nz_lo;
360 int nx_hi, ny_hi, nz_hi;
362 int num = SO_ComputeRange(level, nx_lo, nx_hi, ny_lo, ny_hi, nz_lo, nz_hi, box, m_lo, m_h, m_level_max);
367 for (int i = nx_lo; i <= nx_hi; i++)
368 for (int j = ny_lo; j <= ny_hi; j++)
369 for (int k = nz_lo; k <= nz_hi; k++)
371 OcKey mc = CalculateMortonCode(level, i, j, k);
375 this->reqeustIntersectionIds(ids, shift, mc, level);
382 template<typename TDataType>
383 GPU_FUNC int SparseOctree<TDataType>::requestIntersectionNumberFromLevel(const AABB box, int level)
385 int nx_lo, ny_lo, nz_lo;
386 int nx_hi, ny_hi, nz_hi;
388 int num = SO_ComputeRangeAtLevel(nx_lo, nx_hi, ny_lo, ny_hi, nz_lo, nz_hi, box, level, m_lo, m_h, m_level_max);
392 //printf("num = %d\n",num);
396 for (int i = nx_lo; i <= nx_hi; i++)
397 for (int j = ny_lo; j <= ny_hi; j++)
398 for (int k = nz_lo; k <= nz_hi; k++)
400 OcKey mc = CalculateMortonCode(level, i, j, k);
402 int d_num = this->requestIntersectionNumber(mc, level);
408 //printf("====\n%d\n%d %d %d\n%d %d %d\n============\n",(int)pow(Real(2), int(level)),nx_lo, ny_lo, nz_lo, nx_hi, ny_hi, nz_hi);
412 template<typename TDataType>
413 GPU_FUNC int SparseOctree<TDataType>::requestIntersectionNumberFromLevel(const AABB box, AABB* data, int level)
415 int nx_lo, ny_lo, nz_lo;
416 int nx_hi, ny_hi, nz_hi;
418 int num = SO_ComputeRangeAtLevel(nx_lo, nx_hi, ny_lo, ny_hi, nz_lo, nz_hi, box, level, m_lo, m_h, m_level_max);
422 //printf("num = %d\n",num);
426 for (int i = nx_lo; i <= nx_hi; i++)
427 for (int j = ny_lo; j <= ny_hi; j++)
428 for (int k = nz_lo; k <= nz_hi; k++)
430 OcKey mc = CalculateMortonCode(level, i, j, k);
432 int d_num = this->requestIntersectionNumber(mc, level, box, data);
438 //printf("====\n%d\n%d %d %d\n%d %d %d\n============\n",(int)pow(Real(2), int(level)),nx_lo, ny_lo, nz_lo, nx_hi, ny_hi, nz_hi);
442 template<typename TDataType>
443 GPU_FUNC void SparseOctree<TDataType>::reqeustIntersectionIdsFromLevel(int* ids, const AABB box, int level)
445 int nx_lo, ny_lo, nz_lo;
446 int nx_hi, ny_hi, nz_hi;
448 int num = SO_ComputeRangeAtLevel(nx_lo, nx_hi, ny_lo, ny_hi, nz_lo, nz_hi, box, level, m_lo, m_h, m_level_max);
454 for (int i = nx_lo; i <= nx_hi; i++)
455 for (int j = ny_lo; j <= ny_hi; j++)
456 for (int k = nz_lo; k <= nz_hi; k++)
458 OcKey mc = CalculateMortonCode(level, i, j, k);
460 //printf("level of point: %d %d %d %d\n", level, i, j, k);
461 this->reqeustIntersectionIds(ids, shift, mc, level);
466 template<typename TDataType>
467 GPU_FUNC void SparseOctree<TDataType>::reqeustIntersectionIdsFromLevel(int* ids, const AABB box, AABB* data, int level)
469 int nx_lo, ny_lo, nz_lo;
470 int nx_hi, ny_hi, nz_hi;
472 int num = SO_ComputeRangeAtLevel(nx_lo, nx_hi, ny_lo, ny_hi, nz_lo, nz_hi, box, level, m_lo, m_h, m_level_max);
478 for (int i = nx_lo; i <= nx_hi; i++)
479 for (int j = ny_lo; j <= ny_hi; j++)
480 for (int k = nz_lo; k <= nz_hi; k++)
482 OcKey mc = CalculateMortonCode(level, i, j, k);
484 //printf("level of point: %d %d %d %d\n", level, i, j, k);
485 this->reqeustIntersectionIds(ids, shift, mc, level, box, data);
491 template<typename TDataType>
492 GPU_FUNC int SparseOctree<TDataType>::requestIntersectionNumberFromBottom(const AABB box)
494 return requestIntersectionNumberFromLevel(box, m_level_max);
497 template<typename TDataType>
498 GPU_FUNC void SparseOctree<TDataType>::reqeustIntersectionIdsFromBottom(int* ids, const AABB box)
500 reqeustIntersectionIdsFromLevel(ids, box, m_level_max);
503 template<typename TDataType>
504 GPU_FUNC int SparseOctree<TDataType>::requestIntersectionNumberFromBottom(const AABB box, AABB* data)
506 return requestIntersectionNumberFromLevel(box, data, m_level_max);
509 template<typename TDataType>
510 GPU_FUNC void SparseOctree<TDataType>::reqeustIntersectionIdsFromBottom(int* ids, const AABB box, AABB* data)
512 reqeustIntersectionIdsFromLevel(ids, box, data, m_level_max);
515 template<typename TDataType>
516 GPU_FUNC int SparseOctree<TDataType>::requestIntersectionNumber(const OcKey key, const Level l)
520 OcKey mask = 7U << DIM * l;
522 OctreeNode node = m_post_ordered_nodes[m_post_ordered_nodes.size() - 1];
524 ret_num += node.getDataSize();
527 int lp = node.level();
528 //root: level 0, i should be indexed from 1 to l
529 for (int i = lp + 1; i <= l;)
531 OcKey mask_i = mask >> DIM * i;
532 int child_index = (key & mask_i) >> (DIM * (l - i));
534 if (node.childs[child_index] == EMPTY)
540 node = m_post_ordered_nodes[node.childs[child_index]];
542 int lc = node.level();
545 auto k1 = key >> DIM * (l - lc);
546 auto k2 = node.key();
547 if (!(k1 == k2)) break;
550 ret_num += node.getDataSize();
551 //the octree may be incomplete, skip over missing internal nodes.
561 template<typename TDataType>
562 GPU_FUNC int SparseOctree<TDataType>::requestIntersectionNumber(const OcKey key, const Level l, const AABB box, AABB* data)
566 OcKey mask = 7U << DIM * l;
568 OctreeNode node = m_post_ordered_nodes[m_post_ordered_nodes.size() - 1];
570 //ret_num += node.getDataSize();
571 int nd_size = node.getDataSize();
572 int t_id = node.getStartIndex();
574 for (int t = 0; t < nd_size; t++)
576 if (box.intersect(data[m_all_nodes[t_id + t].getDataIndex()], tmp_box))
584 int lp = node.level();
585 //root: level 0, i should be indexed from 1 to l
586 for (int i = lp + 1; i <= l;)
588 OcKey mask_i = mask >> DIM * i;
589 int child_index = (key & mask_i) >> (DIM * (l - i));
591 if (node.childs[child_index] == EMPTY)
597 node = m_post_ordered_nodes[node.childs[child_index]];
599 int lc = node.level();
602 auto k1 = key >> DIM * (l - lc);
603 auto k2 = node.key();
604 if (!(k1 == k2)) break;
606 int t_id = node.getStartIndex();
607 int nd_size = node.getDataSize();
610 for (int t = 0; t < nd_size; t++)
612 if (box.intersect(data[m_all_nodes[t_id + t].getDataIndex()], tmp_box))
615 //the octree may be incomplete, skip over missing internal nodes.
626 template<typename TDataType>
627 GPU_FUNC void SparseOctree<TDataType>::reqeustIntersectionIds(int* ids, int& shift, const OcKey key, const Level l)
629 OcKey mask = 7U << DIM * l;
631 OctreeNode node = m_post_ordered_nodes[m_post_ordered_nodes.size() - 1];
633 int nd_size = node.getDataSize();
635 int t_id = node.getStartIndex();
636 for (int t = 0; t < nd_size; t++)
638 ids[shift] = m_all_nodes[t_id + t].getDataIndex();
641 //printf("level = %d, shift = %d\n", 0, shift);
642 int lp = node.level();
643 //root: level 0, i should be indexed from 1 to l
644 for (int i = lp + 1; i <= l;)
647 OcKey mask_i = mask >> DIM * i;
648 int child_index = (key & mask_i) >> (DIM * (l - i));
649 //printf("child_index = %d\n", child_index);
650 if (node.childs[child_index] == EMPTY)
656 node = m_post_ordered_nodes[node.childs[child_index]];
657 int nd_size = node.getDataSize();
659 int lc = node.level();
663 auto k1 = key >> DIM * (l - lc);
664 auto k2 = node.key();
665 if (!(k1 == k2)) break;
668 int t_id = node.getStartIndex();
669 for (int t = 0; t < nd_size; t++)
671 ids[shift] = m_all_nodes[t_id + t].getDataIndex();
675 //the octree may be incomplete, skip over missing internal nodes.
682 node.getCoord(level, x, y, z);
683 //printf("level = %d, shift = %d, level = %d, x = %d, y = %d, z = %d\n", i, shift, level, x, y, z);
688 // printf("shift = %d\n", shift);
691 template<typename TDataType>
692 GPU_FUNC void SparseOctree<TDataType>::reqeustIntersectionIds(int* ids, int& shift, const OcKey key, const Level l, const AABB box, AABB* data)
694 OcKey mask = 7U << DIM * l;
696 OctreeNode node = m_post_ordered_nodes[m_post_ordered_nodes.size() - 1];
698 int nd_size = node.getDataSize();
700 int t_id = node.getStartIndex();
701 for (int t = 0; t < nd_size; t++)
703 if (box.intersect(data[m_all_nodes[t_id + t].getDataIndex()], tmp_box))
705 ids[shift] = m_all_nodes[t_id + t].getDataIndex();
708 //ids[shift] = m_all_nodes[t_id + t].getDataIndex();
711 //printf("level = %d, shift = %d\n", 0, shift);
712 int lp = node.level();
713 //root: level 0, i should be indexed from 1 to l
714 for (int i = lp + 1; i <= l;)
717 OcKey mask_i = mask >> DIM * i;
718 int child_index = (key & mask_i) >> (DIM * (l - i));
719 //printf("child_index = %d\n", child_index);
720 if (node.childs[child_index] == EMPTY)
726 node = m_post_ordered_nodes[node.childs[child_index]];
727 int nd_size = node.getDataSize();
729 int lc = node.level();
732 auto k1 = key >> DIM * (l - lc);
733 auto k2 = node.key();
734 if (!(k1 == k2)) break;
737 int t_id = node.getStartIndex();
739 for (int t = 0; t < nd_size; t++)
741 if (box.intersect(data[m_all_nodes[t_id + t].getDataIndex()], tmp_box))
743 ids[shift] = m_all_nodes[t_id + t].getDataIndex();
748 //the octree may be incomplete, skip over missing internal nodes.
755 node.getCoord(level, x, y, z);
756 //printf("level = %d, shift = %d, level = %d, x = %d, y = %d, z = %d\n", i, shift, level, x, y, z);
761 // printf("shift = %d\n", shift);
763 template<typename TDataType>
764 void SparseOctree<TDataType>::construct(
765 const DArray<Coord>& points,
769 aabb.resize(points.size());
772 cuExecute(points.size(),
778 this->construct(aabb);
784 template<typename Real>
785 DYN_FUNC inline Level SO_ComputeLevel(
790 Real len_max = maximum(box.length(0), maximum(box.length(1), box.length(2)));
792 return level_max - clamp(int(ceil(log2(len_max / h_min))), 0, level_max);
795 template<typename Coord>
796 DYN_FUNC int SO_ComputeRange(
809 Coord lo_rel = box.v0 - origin;
810 Coord hi_rel = box.v1 - origin;
812 level = SO_ComputeLevel(box, h_min, level_max);
814 int grid_size = (int)pow(Real(2), int(level));
815 Real h = h_min * pow(Real(2), level_max - level);
817 // nx_lo = clamp(int(floor(lo_rel[0] / h)), 0, grid_size - 1);
818 // ny_lo = clamp(int(floor(lo_rel[1] / h)), 0, grid_size - 1);
819 // nz_lo = clamp(int(floor(lo_rel[2] / h)), 0, grid_size - 1);
821 // nx_hi = clamp(int(floor(hi_rel[0] / h)), 0, grid_size - 1);
822 // ny_hi = clamp(int(floor(hi_rel[1] / h)), 0, grid_size - 1);
823 // nz_hi = clamp(int(floor(hi_rel[2] / h)), 0, grid_size - 1);
825 nx_lo = (int)floor(lo_rel[0] / h);
826 ny_lo = (int)floor(lo_rel[1] / h);
827 nz_lo = (int)floor(lo_rel[2] / h);
829 nx_hi = (int)floor(hi_rel[0] / h);
830 ny_hi = (int)floor(hi_rel[1] / h);
831 nz_hi = (int)floor(hi_rel[2] / h);
836 if (nx_hi < 0 || nx_lo >= grid_size || ny_hi < 0 || ny_lo >= grid_size || nz_hi < 0 || nz_lo >= grid_size)
842 nx_lo = clamp(nx_lo, 0, grid_size - 1);
843 ny_lo = clamp(ny_lo, 0, grid_size - 1);
844 nz_lo = clamp(nz_lo, 0, grid_size - 1);
846 nx_hi = clamp(nx_hi, 0, grid_size - 1);
847 ny_hi = clamp(ny_hi, 0, grid_size - 1);
848 nz_hi = clamp(nz_hi, 0, grid_size - 1);
850 return (nz_hi - nz_lo + 1) * (ny_hi - ny_lo + 1) * (nx_hi - nx_lo + 1);
853 template<typename Coord>
854 DYN_FUNC int SO_ComputeRangeAtLevel(
867 Coord lo_rel = box.v0 - origin;
868 Coord hi_rel = box.v1 - origin;
870 int grid_size = (int)pow(Real(2), int(level));
871 Real h = h_min * pow(Real(2), level_max - level);
873 nx_lo = (int)floor(lo_rel[0] / h);
874 ny_lo = (int)floor(lo_rel[1] / h);
875 nz_lo = (int)floor(lo_rel[2] / h);
877 nx_hi = (int)floor(hi_rel[0] / h);
878 ny_hi = (int)floor(hi_rel[1] / h);
879 nz_hi = (int)floor(hi_rel[2] / h);
881 if (nx_hi < 0 || nx_lo >= grid_size || ny_hi < 0 || ny_lo >= grid_size || nz_hi < 0 || nz_lo >= grid_size)
886 nx_lo = clamp(nx_lo, 0, grid_size - 1);
887 ny_lo = clamp(ny_lo, 0, grid_size - 1);
888 nz_lo = clamp(nz_lo, 0, grid_size - 1);
890 nx_hi = clamp(nx_hi, 0, grid_size - 1);
891 ny_hi = clamp(ny_hi, 0, grid_size - 1);
892 nz_hi = clamp(nz_hi, 0, grid_size - 1);
894 return (nz_hi - nz_lo + 1) * (ny_hi - ny_lo + 1) * (nx_hi - nx_lo + 1);
898 template<typename Coord>
899 __global__ void SO_InitCounter(
906 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
908 if (tId >= aabb.size()) return;
920 counter[tId] = SO_ComputeRange(level, nx_lo, nx_hi, ny_lo, ny_hi, nz_lo, nz_hi, aabb[tId], origin, h_min, level_max);
923 template<typename Coord>
924 __global__ void SO_CreateAllNodes(
925 DArray<OctreeNode> nodes,
932 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
934 if (tId >= aabb.size()) return;
946 int num = SO_ComputeRange(level, nx_lo, nx_hi, ny_lo, ny_hi, nz_lo, nz_hi, aabb[tId], origin, h_min, level_max);
951 for (int i = nx_lo; i <= nx_hi; i++)
952 for (int j = ny_lo; j <= ny_hi; j++)
953 for (int k = nz_lo; k <= nz_hi; k++)
955 auto mc = OctreeNode(level, i, j, k);
956 mc.setDataIndex(tId);
958 nodes[index[tId] + acc_num] = mc;
960 //printf("%d %d %d %d %d \n", index[tId], morton[index[tId] + acc_num].childs[0], i, j, k);
967 __global__ void SO_CountNonRepeatedNodes(
969 DArray<OctreeNode> morton)
971 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
973 if (tId >= counter.size()) return;
975 if ((tId == 0 || morton[tId].key() != morton[tId - 1].key()))
982 //printf("%d %d \n", tId, counter[tId]);
985 //the size of counter is 1 more than the size of post_ordered_nodes
986 __global__ void SO_RemoveDuplicativeNodes(
987 DArray<OctreeNode> non_repeated_nodes,
988 DArray<OctreeNode> post_ordered_nodes,
992 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
994 if (tId >= post_ordered_nodes.size()) return;
996 if (tId == 0 || post_ordered_nodes[tId].key() != post_ordered_nodes[tId - 1].key())
998 non_repeated_nodes[counter[tId]] = post_ordered_nodes[tId];
999 non_repeated_nodes[counter[tId]].setStartIndex(tId);
1002 //printf("%d %d \n", tId, counter[tId]);
1005 __global__ void SO_CalculateDataSize(
1006 DArray<OctreeNode> non_repeated_nodes,
1007 int non_repeated_node_num,
1010 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
1011 if (tId >= non_repeated_node_num) return;
1013 if (tId < non_repeated_node_num - 1)
1014 non_repeated_nodes[tId].setDataSize(non_repeated_nodes[tId + 1].getStartIndex() - non_repeated_nodes[tId].getStartIndex());
1016 non_repeated_nodes[tId].setDataSize(total_node_num - non_repeated_nodes[tId].getStartIndex());
1019 __global__ void SO_GenerateLCA(
1020 DArray<OctreeNode> nodes,
1024 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
1025 if (tId >= shift - 1) return;
1027 nodes[tId + shift] = nodes[tId].leastCommonAncestor(nodes[tId + 1]);
1030 __global__ void SO_RemoveDuplicativeInternalNodes(
1031 DArray<OctreeNode> nodes,
1032 DArray<int> counter,
1033 DArray<OctreeNode> nodes_cpy)
1035 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
1037 if (tId >= nodes.size()) return;
1039 if (tId > 0 && nodes[tId].key() == nodes[tId - 1].key())
1042 nodes[tId] = OctreeNode();
1047 nodes[tId] = nodes_cpy[tId];
1051 __global__ void SO_GenerateAuxNodes(
1052 DArray<OctreeNode> aux_nodes,
1053 DArray<OctreeNode> post_ordered_nodes,
1056 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
1058 if (tId >= post_ordered_nodes.size()) return;
1060 aux_nodes[tId] = post_ordered_nodes[tId];
1061 aux_nodes[tId].m_current_loc = tId;
1062 if (tId < shift - 1)
1064 auto cp = post_ordered_nodes[tId].leastCommonAncestor(post_ordered_nodes[tId + 1]);
1065 cp.setFirstChildIndex(tId);
1068 aux_nodes[tId + shift] = cp;
1072 __global__ void SO_GeneratePostOrderedNodes(
1073 DArray<OctreeNode> post_ordered_nodes,
1074 DArray<OctreeNode> nodes_buffer,
1077 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
1079 if (tId >= num) return;
1081 post_ordered_nodes[tId] = nodes_buffer[tId];
1084 __global__ void SO_SetupParentChildRelationship(
1085 DArray<OctreeNode> post_ordered_nodes,
1086 DArray<OctreeNode> aux_nodes)
1088 int tId = threadIdx.x + (blockIdx.x * blockDim.x);
1090 if (tId >= aux_nodes.size() - 1)
1093 int aux_num = aux_nodes.size();
1096 if ((aux_nodes[tId].m_bCopy != aux_nodes[t].m_bCopy))
1098 int aux_level = aux_nodes[tId].m_level;
1099 int aux_key = aux_nodes[tId].key();
1100 int aux_cur_loc = aux_nodes[tId].m_current_loc;
1101 while (t < aux_num && (aux_nodes[tId].key() == aux_nodes[t].key()))
1103 int tLoc = aux_nodes[t].getFirstChildIndex();
1105 OctreeNode child = post_ordered_nodes[tLoc];
1107 auto cKey = child.key();
1109 int cIndex = (cKey >> 3 * (child.m_level - aux_level - 1)) & 7U;
1111 post_ordered_nodes[aux_cur_loc].childs[cIndex] = tLoc;
1118 template<typename TDataType>
1119 void SparseOctree<TDataType>::construct(const DArray<AABB>& aabb)
1122 data_count.resize(aabb.size());
1125 /*************** step 1: identify nodes that containing data ****************/
1126 //step 1.1: calculate the cell number each point covers
1127 cuExecute(data_count.size(),
1135 //step 1.2: allocate enough space to store all cells
1136 int total_node_num = thrust::reduce(thrust::device, data_count.begin(), data_count.begin() + data_count.size(), (int)0, thrust::plus<int>());
1137 thrust::exclusive_scan(thrust::device, data_count.begin(), data_count.begin() + data_count.size(), data_count.begin());
1139 m_all_nodes.resize(total_node_num);
1142 //std::cout << MAX_LEVEL << ' ' << m_level_max << std::endl;
1144 //step 1.3: create all octree nodes, record the data location using node.setDataIndex().
1145 cuExecute(aabb.size(),
1154 //print(m_all_nodes);
1155 //std::cout << total_node_num << std::endl;
1156 thrust::sort(thrust::device, m_all_nodes.begin(), m_all_nodes.begin() + m_all_nodes.size(), NodeCmp());
1158 //print(m_all_nodes);
1160 construct(m_all_nodes);
1162 //data_count.clear();
1163 //thrust::sort_by_key(thrust::device, m_key.getDataPtr(), m_key.getDataPtr() + m_key.size(), m_morton.getDataPtr());
1166 template<typename TDataType>
1167 void SparseOctree<TDataType>::construct(const DArray<OctreeNode>& nodes)
1169 /*************** step 2: remove duplicative nodes ****************/
1170 int total_node_num = nodes.size();
1172 duplicates_count.resize(total_node_num);
1174 cuExecute(duplicates_count.size(),
1175 SO_CountNonRepeatedNodes,
1179 int non_duplicative_num = thrust::reduce(thrust::device, duplicates_count.begin(), duplicates_count.begin() + duplicates_count.size(), (int)0, thrust::plus<int>());
1180 thrust::exclusive_scan(thrust::device, duplicates_count.begin(), duplicates_count.begin() + duplicates_count.size(), duplicates_count.begin());
1182 node_buffer.resize(2 * non_duplicative_num - 1);
1184 //Remove duplicative nodes, record the first location using node.setStartIndex();
1185 cuExecute(nodes.size(),
1186 SO_RemoveDuplicativeNodes,
1191 //print(node_buffer);
1193 cuExecute(non_duplicative_num,
1194 SO_CalculateDataSize,
1196 non_duplicative_num,
1199 //print(node_buffer);
1202 //thrust::sort(thrust::device, nonRepeatMorton.getDataPtr(), nonRepeatMorton.getDataPtr() + non_repeated_num, NodeCmp());
1204 /*************** step 3: step up parent-child relationship * ****************/
1206 //step 3.1: Generate internal nodes & sort
1207 cuExecute(non_duplicative_num - 1,
1210 non_duplicative_num);
1212 thrust::sort(thrust::device, node_buffer.begin(), node_buffer.begin() + node_buffer.size(), NodeCmp());
1214 //print(node_buffer);
1215 nonRepeatNodes_cpy.resize(node_buffer.size());
1216 nonRepeatNodes_cpy.assign(node_buffer);
1219 node_count.resize(node_buffer.size());
1221 //step 3.2: remove duplicates
1222 cuExecute(node_buffer.size(),
1223 SO_RemoveDuplicativeInternalNodes,
1226 nonRepeatNodes_cpy);
1228 thrust::sort(thrust::device, node_buffer.begin(), node_buffer.begin() + node_buffer.size(), NodeCmp());
1229 non_duplicative_num = thrust::reduce(thrust::device, node_count.begin(), node_count.begin() + node_count.size(), (int)0, thrust::plus<int>());
1232 m_post_ordered_nodes.resize(non_duplicative_num);
1234 cuExecute(m_post_ordered_nodes.size(),
1235 SO_GeneratePostOrderedNodes,
1236 m_post_ordered_nodes,
1238 non_duplicative_num);
1240 //print(m_post_ordered_nodes);
1241 aux_nodes.resize(2 * m_post_ordered_nodes.size() - 1);
1243 cuExecute(m_post_ordered_nodes.size(),
1244 SO_GenerateAuxNodes,
1246 m_post_ordered_nodes,
1247 m_post_ordered_nodes.size());
1251 thrust::sort(thrust::device, aux_nodes.begin(), aux_nodes.begin() + aux_nodes.size(), NodeCmp());
1255 cuExecute(aux_nodes.size(),
1256 SO_SetupParentChildRelationship,
1257 m_post_ordered_nodes,
1260 //print(m_post_ordered_nodes);
1263 template<typename TDataType>
1264 void SparseOctree<TDataType>::printPostOrderedTree()
1266 print(m_post_ordered_nodes);
1270 template<typename TDataType>
1271 void SparseOctree<TDataType>::printAllNodes()
1276 DEFINE_CLASS(SparseOctree);