1#include <cuda_runtime.h>
2#include <vector_types.h>
4#include "Function2Pt.h"
10 template <typename T, typename Function>
11 __global__ void KerTwoPointFunc(T *out, T* a1, T* a2, size_t num, Function func)
13 int pId = threadIdx.x + (blockIdx.x * blockDim.x);
14 if (pId >= num) return;
16 out[pId] = func(a1[pId], a2[pId]);
19 template <typename T, typename Function>
20 __global__ void KerTwoPointFunc(T *out, T* a2, size_t num, Function func)
22 int pId = threadIdx.x + (blockIdx.x * blockDim.x);
23 if (pId >= num) return;
25 out[pId] = func(out[pId], a2[pId]);
29 __global__ void KerSaxpy(T *zArr, T* xArr, T* yArr, T alpha, size_t num)
31 int pId = threadIdx.x + (blockIdx.x * blockDim.x);
32 if (pId >= num) return;
34 zArr[pId] = alpha * xArr[pId] + yArr[pId];
39 void plus(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr)
41 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
42 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
43 KerTwoPointFunc << <pDim, BLOCK_SIZE >> > (zArr.begin(), xArr.begin(), yArr.begin(), zArr.size(), PlusFunc<T>());
48 void subtract(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr)
50 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
51 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
52 KerTwoPointFunc <<<pDim, BLOCK_SIZE >>> (zArr.begin(), xArr.begin(), yArr.begin(), zArr.size(), MinusFunc<T>());
57 void multiply(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr)
59 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
60 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
61 KerTwoPointFunc << <pDim, BLOCK_SIZE >> > (zArr.begin(), xArr.begin(), yArr.begin(), zArr.size(), MultiplyFunc<T>());
66 void divide(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr)
68 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
69 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
70 KerTwoPointFunc << <pDim, BLOCK_SIZE >> > (zArr.begin(), xArr.begin(), yArr.begin(), zArr.size(), DivideFunc<T>());
76 void saxpy(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr, T alpha)
78 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
79 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
80 KerSaxpy << <pDim, BLOCK_SIZE >> > (zArr.begin(), xArr.begin(), yArr.begin(), alpha, zArr.size());
83 template void plus(DArray<int>&, DArray<int>&, DArray<int>&);
84 template void plus(DArray<float>&, DArray<float>&, DArray<float>&);
85 template void plus(DArray<double>&, DArray<double>&, DArray<double>&);
87 template void subtract(DArray<int>&, DArray<int>&, DArray<int>&);
88 template void subtract(DArray<float>&, DArray<float>&, DArray<float>&);
89 template void subtract(DArray<double>&, DArray<double>&, DArray<double>&);
91 template void multiply(DArray<int>&, DArray<int>&, DArray<int>&);
92 template void multiply(DArray<float>&, DArray<float>&, DArray<float>&);
93 template void multiply(DArray<double>&, DArray<double>&, DArray<double>&);
95 template void divide(DArray<int>&, DArray<int>&, DArray<int>&);
96 template void divide(DArray<float>&, DArray<float>&, DArray<float>&);
97 template void divide(DArray<double>&, DArray<double>&, DArray<double>&);
99 template void saxpy(DArray<float>&, DArray<float>&, DArray<float>&, float);
100 template void saxpy(DArray<double>&, DArray<double>&, DArray<double>&, double);