PeriDyno 1.0.0
Loading...
Searching...
No Matches
SMAlgorithm.cu
Go to the documentation of this file.
1#include <cuda_runtime.h>
2#include "SMAlgorithm.h"
3#include "Algorithm/Reduction.h"
4#include "Algorithm/Arithmetic.h"
5#include "Algorithm/Function2Pt.h"
6
7
8namespace dyno
9{
10 //compute transposed(A)*a
11 template <typename VarType>
12 __global__ void transposedA_a(
13 DArrayMap<VarType> matrix_a,
14 DArray<VarType> a,
15 DArray<VarType> Aa)
16 {
17 int tx = blockIdx.x*blockDim.x + threadIdx.x;
18
19 if (tx < a.size())
20 {
21 VarType sum = 0;
22 for (int k = 0; k < a.size(); k++)
23 {
24 Map<int, VarType> map = matrix_a[k];
25 if (map.size() > 0)
26 {
27 auto pair_v = map.find(tx);
28 if (pair_v != nullptr)
29 sum += (pair_v->second)*a[k];
30 }
31 }
32 Aa[tx] = sum;
33 }
34 }
35
36 template<typename VarType>
37 void multiply_transposedSM_by_vector(DArrayMap<VarType>& matrix_a, DArray<VarType>& a, DArray<VarType>& Aa)
38 {
39 uint pDims = cudaGridSize(a.size(), BLOCK_SIZE);
40 transposedA_a << <pDims, BLOCK_SIZE >> > (
41 matrix_a,
42 a,
43 Aa);
44 cuSynchronize();
45 }
46
47 template void multiply_transposedSM_by_vector<float>(DArrayMap<float>& matrix_a, DArray<float>& a, DArray<float>& Aa);
48 template void multiply_transposedSM_by_vector<double>(DArrayMap<double>& matrix_a, DArray<double>& a, DArray<double>& Aa);
49
50
51 //compute A*a
52 template <typename VarType>
53 __global__ void A_a(
54 DArrayMap<VarType> matrix_a,
55 DArray<VarType> a,
56 DArray<VarType> Aa)
57 {
58 int tx = blockIdx.x*blockDim.x + threadIdx.x;
59
60 if (tx < a.size())
61 {
62 VarType sum = 0;
63 Map<int, VarType> map = matrix_a[tx];
64
65 if (map.size() > 0)
66 {
67 for (auto pair_v = map.begin(); pair_v != map.end(); ++pair_v)
68 {
69 int key = pair_v->first;
70 sum += (pair_v->second)*a[key];
71 }
72 }
73 Aa[tx] = sum;
74 }
75 }
76
77 template<typename VarType>
78 void multiply_SM_by_vector(DArrayMap<VarType>& matrix_a, DArray<VarType>& a, DArray<VarType>& Aa)
79 {
80 uint pDims = cudaGridSize(a.size(), BLOCK_SIZE);
81 A_a << <pDims, BLOCK_SIZE >> > (
82 matrix_a,
83 a,
84 Aa);
85 cuSynchronize();
86 }
87
88 template void multiply_SM_by_vector<float>(DArrayMap<float>& matrix_a, DArray<float>& a, DArray<float>& Aa);
89 template void multiply_SM_by_vector<double>(DArrayMap<double>& matrix_a, DArray<double>& a, DArray<double>& Aa);
90}