-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatrixCUSPARSE.cuh
125 lines (113 loc) · 5.5 KB
/
matrixCUSPARSE.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#pragma once
#include "matrixCUDA.cuh"
#include "vectorCUBLAS.cuh"
// cusparseSpMVAlg_t descrepancy between nvcc v11 and v12
#if defined(__CUDACC_VER_MAJOR__)
#if __CUDACC_VER_MAJOR__ == 12
#define ALGORITHM CUSPARSE_SPMV_ALG_DEFAULT
#else
#define ALGORITHM CUSPARSE_MV_ALG_DEFAULT
#endif
#else
#define ALGORITHM CUSPARSE_MV_ALG_DEFAULT
#endif
class MatrixCUSPARSE : public MatrixGPU {
protected:
cusparseSpMatDescr_t spMatDescr = NULL;
void denseToCUSPARSE(double *m) {
// temp gpu allocation
cusparseDnMatDescr_t dnMatDescr;
double *d_mat;
cudaErrCheck(cudaMalloc((void **)&d_mat, h_rows * h_columns * sizeof(double)));
cudaErrCheck(cudaMemcpy(d_mat, m, h_rows * h_columns * sizeof(double), cudaMemcpyHostToDevice));
// cusparse dense and sparse matricies
cusparseErrCheck(cusparseCreateDnMat(&dnMatDescr, h_rows, h_columns, h_columns, d_mat, CUDA_R_64F, CUSPARSE_ORDER_ROW));
cusparseErrCheck(cusparseCreateCsr(&spMatDescr, h_rows, h_columns, 0, d_csrRowPtr, NULL, NULL, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F));
cusparseErrCheck(cusparseDenseToSparse_bufferSize(spHandle, dnMatDescr, spMatDescr, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT, &bufferSize));
cudaErrCheck(cudaMalloc(&dBuffer, bufferSize));
cusparseErrCheck(cusparseDenseToSparse_analysis(spHandle, dnMatDescr, spMatDescr, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT, dBuffer));
// find nnz
int64_t rows_tmp, cols_tmp, h_nnz_temp;
cusparseErrCheck(cusparseSpMatGetSize(spMatDescr, &rows_tmp, &cols_tmp, &h_nnz_temp));
h_nnz = (unsigned)h_nnz_temp;
cudaErrCheck(cudaMalloc((void **)&d_csrColInd, h_nnz * sizeof(int)));
cudaErrCheck(cudaMalloc((void **)&d_csrVal, h_nnz * sizeof(double)));
// set CSR
cusparseErrCheck(cusparseCsrSetPointers(spMatDescr, d_csrRowPtr, d_csrColInd, d_csrVal));
cusparseErrCheck(cusparseDenseToSparse_convert(spHandle, dnMatDescr, spMatDescr, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT, dBuffer));
// free gpu temp allocations
cusparseDestroyDnMat(dnMatDescr);
cudaFree(d_mat);
}
public:
/** Constructors */
MatrixCUSPARSE() : MatrixGPU(0, 0, 0u){}; // Default Constr.
MatrixCUSPARSE(unsigned r, unsigned c) : MatrixGPU(r, c, 0u){}; // Constr. #1
MatrixCUSPARSE(unsigned r, unsigned c, unsigned n) : MatrixGPU(r, c, n){}; // Constr. #2
MatrixCUSPARSE(unsigned r, unsigned c, double *m) : MatrixGPU(r, c, 0u) { // Constr. #3
// free resources that will be reallocated
cudaErrCheck(cudaFree(d_csrVal));
cudaErrCheck(cudaFree(d_csrColInd));
// dense to sparse
denseToCUSPARSE(m);
};
MatrixCUSPARSE(const MatrixCUSPARSE &m) : MatrixGPU(m) { // Copy Constr.
cusparseErrCheck(cusparseCreateCsr(&spMatDescr, h_rows, h_columns, h_nnz, d_csrRowPtr, d_csrColInd, d_csrVal, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F));
};
MatrixCUSPARSE(MatrixCUSPARSE &&m) noexcept : MatrixGPU(std::move(m)) { // Move Constr.
// free old resources
cusparseErrCheck(cusparseDestroySpMat(m.spMatDescr));
// placeholder
cusparseErrCheck(cusparseCreateCsr(&m.spMatDescr, m.h_rows, m.h_columns, m.h_nnz, m.d_csrRowPtr, m.d_csrColInd, m.d_csrVal, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F));
};
/** Destructor */
~MatrixCUSPARSE() { cusparseErrCheck(cusparseDestroySpMat(spMatDescr)); };
/** Assignments */
MatrixCUSPARSE &operator=(const MatrixCUSPARSE &m) { // Copy Assignment
// free + memory allocation
if (h_rows != m.h_rows) {
h_rows = m.h_rows;
cudaErrCheck(cudaMemcpy(d_rows, m.d_rows, sizeof(unsigned), cudaMemcpyDeviceToDevice));
cudaErrCheck(cudaFree(d_csrRowPtr));
cudaErrCheck(cudaMalloc((void **)&d_csrRowPtr, sizeof(int) * (m.h_rows + 1)));
}
if (h_nnz != m.h_nnz) {
h_nnz = m.h_nnz;
cudaErrCheck(cudaFree(d_csrVal));
cudaErrCheck(cudaFree(d_csrColInd));
cudaErrCheck(cudaMalloc((void **)&d_csrVal, sizeof(double) * h_nnz));
cudaErrCheck(cudaMalloc((void **)&d_csrColInd, sizeof(int) * h_nnz));
}
if (h_columns != m.h_columns) {
h_columns = m.h_columns;
cudaErrCheck(cudaMemcpy(d_columns, m.d_columns, sizeof(unsigned), cudaMemcpyDeviceToDevice));
}
// copy to device
cudaErrCheck(cudaMemcpy(d_csrVal, m.d_csrVal, h_nnz * sizeof(double), cudaMemcpyDeviceToDevice));
cudaErrCheck(cudaMemcpy(d_csrColInd, m.d_csrColInd, h_nnz * sizeof(int), cudaMemcpyDeviceToDevice));
cudaErrCheck(cudaMemcpy(d_csrRowPtr, m.d_csrRowPtr, (h_rows + 1) * sizeof(int), cudaMemcpyDeviceToDevice));
// cusparse
cusparseErrCheck(cusparseCreateCsr(&spMatDescr, h_rows, h_columns, h_nnz, d_csrRowPtr, d_csrColInd, d_csrVal, CUSPARSE_INDEX_32I,
CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F));
return *this;
};
MatrixCUSPARSE &operator=(MatrixCUSPARSE &&m) noexcept { // Move Assignment
// call copy assignment
*this = m;
m.h_rows = ZERO;
m.h_nnz = ZERO;
m.h_columns = ZERO;
// freeing memory handled by destructor, potential err. blocked via rows = cols = 0
return *this;
};
/** Operator overloads */
VectorCUDA operator*(VectorCUDA &v); // Multiplication
VectorCUBLAS operator*(VectorCUBLAS &v);
/** Member Functions */
MatrixCUSPARSE transpose();
double Dnrm2();
template <typename T> friend void SpMV(MatrixCUSPARSE &M, T &v, T &out);
};