39 uint32_t globalSize = AddZeroSize;
41 uint32_t workgroup_size_x;
43 if (AddZeroSize < 128) {
44 workgroup_size_x = AddZeroSize / 2;
47 workgroup_size_x = 64;
53 AddZeroArray.
resize(AddZeroSize);
55 const uint32_t workgroup_count = globalSize / (workgroup_size_x * 2);
57 uint32_t h = workgroup_size_x * 2;
60 groupSize.
x = workgroup_count;
69 param.
h = data.
size();
72 mSortKernel->flush(groupSize, &data, &uniformParam, &AddZeroArray);
78 mSortKernel->flush(groupSize, &AddZeroArray, &uniformParam, &data);
83 for (; h <= globalSize; h *= 2) {
91 mSortKernel->flush(groupSize, &AddZeroArray, &uniformParam, &data);
93 for (uint32_t hh = h / 2; hh > 1; hh /= 2) {
94 if (hh <= workgroup_size_x * 2) {
99 mSortKernel->flush(groupSize, &AddZeroArray, &uniformParam, &data);
107 mSortKernel->flush(groupSize, &AddZeroArray, &uniformParam, &data);
116 mSortKernel->flush(groupSize, &data, &uniformParam, &AddZeroArray);
121 assert(keys.size() == values.size());
123 uint32_t dSize = keys.size();
126 kArray.
resize(keys.size());
130 vArray.
resize(values.size());
136 std::vector<T> ouputKData(keys.size());
139 std::vector<T> ouputVData(values.size());
142 keys.swap(ouputKData);
143 values.swap(ouputVData);
154 uint32_t workgroup_size_x;
155 if (AddZeroSize < 128) {
156 workgroup_size_x = AddZeroSize / 2;
159 workgroup_size_x = 64;
162 uint32_t globalSize = AddZeroSize;
167 KeysAddZeroArray.
resize(AddZeroSize);
169 ValuesAddZeroArray.
resize(AddZeroSize);
171 const uint32_t workgroup_count = globalSize / (workgroup_size_x * 2);
173 uint32_t h = workgroup_size_x * 2;
176 groupSize.
x = workgroup_count;
184 param.
h = keys.
size();
190 mSortByKeyKernel->flush(groupSize, &keys, &values, &uniformParam, &KeysAddZeroArray, &ValuesAddZeroArray);
198 mSortByKeyKernel->flush(groupSize, &KeysAddZeroArray, &ValuesAddZeroArray, &uniformParam, &keys, &values);
205 for (; h <= globalSize; h *= 2) {
212 mSortByKeyKernel->flush(groupSize, &KeysAddZeroArray, &ValuesAddZeroArray, &uniformParam, &keys, &values);
214 for (uint32_t hh = h / 2; hh > 1; hh /= 2) {
215 if (hh <= workgroup_size_x * 2) {
220 mSortByKeyKernel->flush(groupSize, &KeysAddZeroArray, &ValuesAddZeroArray, &uniformParam, &keys, &values);
228 mSortByKeyKernel->flush(groupSize, &KeysAddZeroArray, &ValuesAddZeroArray, &uniformParam, &keys, &values);
239 mSortByKeyKernel->flush(groupSize, &keys, &values, &uniformParam, &KeysAddZeroArray, &ValuesAddZeroArray);
241 KeysAddZeroArray.
clear();
242 ValuesAddZeroArray.
clear();