PeriDyno 1.0.0
Loading...
Searching...
No Matches
Function2Pt.cu
Go to the documentation of this file.
1#include <cuda_runtime.h>
2#include <vector_types.h>
3#include "Functional.h"
4#include "Function2Pt.h"
5
6namespace dyno
7{
8 namespace Function2Pt
9 {
10 template <typename T, typename Function>
11 __global__ void KerTwoPointFunc(T *out, T* a1, T* a2, size_t num, Function func)
12 {
13 int pId = threadIdx.x + (blockIdx.x * blockDim.x);
14 if (pId >= num) return;
15
16 out[pId] = func(a1[pId], a2[pId]);
17 }
18
19 template <typename T, typename Function>
20 __global__ void KerTwoPointFunc(T *out, T* a2, size_t num, Function func)
21 {
22 int pId = threadIdx.x + (blockIdx.x * blockDim.x);
23 if (pId >= num) return;
24
25 out[pId] = func(out[pId], a2[pId]);
26 }
27
28 template <typename T>
29 __global__ void KerSaxpy(T *zArr, T* xArr, T* yArr, T alpha, size_t num)
30 {
31 int pId = threadIdx.x + (blockIdx.x * blockDim.x);
32 if (pId >= num) return;
33
34 zArr[pId] = alpha * xArr[pId] + yArr[pId];
35 }
36
37
38 template <typename T>
39 void plus(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr)
40 {
41 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
42 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
43 KerTwoPointFunc << <pDim, BLOCK_SIZE >> > (zArr.begin(), xArr.begin(), yArr.begin(), zArr.size(), PlusFunc<T>());
44
45 }
46
47 template <typename T>
48 void subtract(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr)
49 {
50 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
51 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
52 KerTwoPointFunc <<<pDim, BLOCK_SIZE >>> (zArr.begin(), xArr.begin(), yArr.begin(), zArr.size(), MinusFunc<T>());
53 }
54
55
56 template <typename T>
57 void multiply(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr)
58 {
59 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
60 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
61 KerTwoPointFunc << <pDim, BLOCK_SIZE >> > (zArr.begin(), xArr.begin(), yArr.begin(), zArr.size(), MultiplyFunc<T>());
62
63 }
64
65 template <typename T>
66 void divide(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr)
67 {
68 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
69 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
70 KerTwoPointFunc << <pDim, BLOCK_SIZE >> > (zArr.begin(), xArr.begin(), yArr.begin(), zArr.size(), DivideFunc<T>());
71
72 }
73
74
75 template <typename T>
76 void saxpy(DArray<T>& zArr, DArray<T>& xArr, DArray<T>& yArr, T alpha)
77 {
78 assert(zArr.size() == xArr.size() && zArr.size() == yArr.size());
79 unsigned pDim = cudaGridSize(zArr.size(), BLOCK_SIZE);
80 KerSaxpy << <pDim, BLOCK_SIZE >> > (zArr.begin(), xArr.begin(), yArr.begin(), alpha, zArr.size());
81 }
82
83 template void plus(DArray<int>&, DArray<int>&, DArray<int>&);
84 template void plus(DArray<float>&, DArray<float>&, DArray<float>&);
85 template void plus(DArray<double>&, DArray<double>&, DArray<double>&);
86
87 template void subtract(DArray<int>&, DArray<int>&, DArray<int>&);
88 template void subtract(DArray<float>&, DArray<float>&, DArray<float>&);
89 template void subtract(DArray<double>&, DArray<double>&, DArray<double>&);
90
91 template void multiply(DArray<int>&, DArray<int>&, DArray<int>&);
92 template void multiply(DArray<float>&, DArray<float>&, DArray<float>&);
93 template void multiply(DArray<double>&, DArray<double>&, DArray<double>&);
94
95 template void divide(DArray<int>&, DArray<int>&, DArray<int>&);
96 template void divide(DArray<float>&, DArray<float>&, DArray<float>&);
97 template void divide(DArray<double>&, DArray<double>&, DArray<double>&);
98
99 template void saxpy(DArray<float>&, DArray<float>&, DArray<float>&, float);
100 template void saxpy(DArray<double>&, DArray<double>&, DArray<double>&, double);
101 }
102}