1#include <cuda_runtime.h>
2#include "SMAlgorithm.h"
3#include "Algorithm/Reduction.h"
4#include "Algorithm/Arithmetic.h"
5#include "Algorithm/Function2Pt.h"
10 //compute transposed(A)*a
11 template <typename VarType>
12 __global__ void transposedA_a(
13 DArrayMap<VarType> matrix_a,
17 int tx = blockIdx.x*blockDim.x + threadIdx.x;
22 for (int k = 0; k < a.size(); k++)
24 Map<int, VarType> map = matrix_a[k];
27 auto pair_v = map.find(tx);
28 if (pair_v != nullptr)
29 sum += (pair_v->second)*a[k];
36 template<typename VarType>
37 void multiply_transposedSM_by_vector(DArrayMap<VarType>& matrix_a, DArray<VarType>& a, DArray<VarType>& Aa)
39 uint pDims = cudaGridSize(a.size(), BLOCK_SIZE);
40 transposedA_a << <pDims, BLOCK_SIZE >> > (
47 template void multiply_transposedSM_by_vector<float>(DArrayMap<float>& matrix_a, DArray<float>& a, DArray<float>& Aa);
48 template void multiply_transposedSM_by_vector<double>(DArrayMap<double>& matrix_a, DArray<double>& a, DArray<double>& Aa);
52 template <typename VarType>
54 DArrayMap<VarType> matrix_a,
58 int tx = blockIdx.x*blockDim.x + threadIdx.x;
63 Map<int, VarType> map = matrix_a[tx];
67 for (auto pair_v = map.begin(); pair_v != map.end(); ++pair_v)
69 int key = pair_v->first;
70 sum += (pair_v->second)*a[key];
77 template<typename VarType>
78 void multiply_SM_by_vector(DArrayMap<VarType>& matrix_a, DArray<VarType>& a, DArray<VarType>& Aa)
80 uint pDims = cudaGridSize(a.size(), BLOCK_SIZE);
81 A_a << <pDims, BLOCK_SIZE >> > (
88 template void multiply_SM_by_vector<float>(DArrayMap<float>& matrix_a, DArray<float>& a, DArray<float>& Aa);
89 template void multiply_SM_by_vector<double>(DArrayMap<double>& matrix_a, DArray<double>& a, DArray<double>& Aa);