39        uint32_t globalSize = AddZeroSize;
 
   41        uint32_t workgroup_size_x;
 
   43        if (AddZeroSize < 128) {
 
   44            workgroup_size_x = AddZeroSize / 2;
 
   47            workgroup_size_x = 64;
 
   53        AddZeroArray.
resize(AddZeroSize);
 
   55        const uint32_t workgroup_count = globalSize / (workgroup_size_x * 2);
 
   57        uint32_t h = workgroup_size_x * 2;
 
   60        groupSize.
x = workgroup_count;
 
   69        param.
h = data.
size();
 
   72        mSortKernel->flush(groupSize, &data, &uniformParam, &AddZeroArray);
 
   78        mSortKernel->flush(groupSize, &AddZeroArray, &uniformParam, &data);
 
   83        for (; h <= globalSize; h *= 2) {
 
   91            mSortKernel->flush(groupSize, &AddZeroArray, &uniformParam, &data);
 
   93            for (uint32_t hh = h / 2; hh > 1; hh /= 2) {
 
   94                if (hh <= workgroup_size_x * 2) {
 
   99                    mSortKernel->flush(groupSize, &AddZeroArray, &uniformParam, &data);
 
  107                    mSortKernel->flush(groupSize, &AddZeroArray, &uniformParam, &data);
 
  116        mSortKernel->flush(groupSize, &data, &uniformParam, &AddZeroArray);
 
 
  121        assert(keys.size() == values.size());
 
  123        uint32_t dSize = keys.size();
 
  126        kArray.
resize(keys.size());
 
  130        vArray.
resize(values.size());
 
  136        std::vector<T> ouputKData(keys.size());
 
  139        std::vector<T> ouputVData(values.size());
 
  142        keys.swap(ouputKData);
 
  143        values.swap(ouputVData);
 
 
  154        uint32_t  workgroup_size_x;
 
  155        if (AddZeroSize < 128) {
 
  156            workgroup_size_x = AddZeroSize / 2;
 
  159            workgroup_size_x = 64;
 
  162        uint32_t globalSize = AddZeroSize;
 
  167        KeysAddZeroArray.
resize(AddZeroSize);
 
  169        ValuesAddZeroArray.
resize(AddZeroSize);
 
  171        const uint32_t workgroup_count = globalSize / (workgroup_size_x * 2);
 
  173        uint32_t h = workgroup_size_x * 2;
 
  176        groupSize.
x = workgroup_count;
 
  184        param.
h = keys.
size();
 
  190        mSortByKeyKernel->flush(groupSize, &keys, &values, &uniformParam, &KeysAddZeroArray, &ValuesAddZeroArray);
 
  198        mSortByKeyKernel->flush(groupSize, &KeysAddZeroArray, &ValuesAddZeroArray, &uniformParam, &keys, &values);
 
  205        for (; h <= globalSize; h *= 2) {
 
  212            mSortByKeyKernel->flush(groupSize, &KeysAddZeroArray, &ValuesAddZeroArray, &uniformParam, &keys, &values);
 
  214            for (uint32_t hh = h / 2; hh > 1; hh /= 2) {
 
  215                if (hh <= workgroup_size_x * 2) {
 
  220                    mSortByKeyKernel->flush(groupSize, &KeysAddZeroArray, &ValuesAddZeroArray, &uniformParam, &keys, &values);
 
  228                    mSortByKeyKernel->flush(groupSize, &KeysAddZeroArray, &ValuesAddZeroArray, &uniformParam, &keys, &values);
 
  239        mSortByKeyKernel->flush(groupSize, &keys, &values, &uniformParam, &KeysAddZeroArray, &ValuesAddZeroArray);
 
  241        KeysAddZeroArray.
clear();
 
  242        ValuesAddZeroArray.
clear();