#include "gpu.h" void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x, cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle, size_t *buffer_size, void **buffer) { /* y += A'*x x and y MUST be on GPU already */ const scs_float onef = 1.0; size_t new_buffer_size = 0; CUSPARSE_GEN(SpMV_bufferSize) (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x, &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size); if (new_buffer_size > *buffer_size) { if (*buffer != SCS_NULL) { cudaFree(*buffer); } cudaMalloc(buffer, *buffer_size); *buffer_size = new_buffer_size; } CUSPARSE_GEN(SpMV) (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x, &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer); } /* this is slow, use trans routine if possible */ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x, cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle, size_t *buffer_size, void **buffer) { /* y += A*x x and y MUST be on GPU already */ const scs_float onef = 1.0; size_t new_buffer_size = 0; /* The A matrix idx pointers must be ORDERED */ CUSPARSE_GEN(SpMV_bufferSize) (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size); if (new_buffer_size > *buffer_size) { if (*buffer != SCS_NULL) { cudaFree(*buffer); } cudaMalloc(buffer, *buffer_size); *buffer_size = new_buffer_size; } CUSPARSE_GEN(SpMV) (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer); } /* This assumes that P has been made full (ie not triangular) and uses the * fact that the GPU is faster for general sparse matrices than for symmetric */ /* y += P*x x and y MUST be on GPU already */ void SCS(accum_by_p_gpu)(const ScsGpuMatrix *Pg, const cusparseDnVecDescr_t x, cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle, size_t *buffer_size, void **buffer) { SCS(accum_by_atrans_gpu)(Pg, x, y, cusparse_handle, buffer_size, buffer); } void SCS(free_gpu_matrix)(ScsGpuMatrix *A) { cudaFree(A->x); cudaFree(A->i); cudaFree(A->p); cusparseDestroySpMat(A->descr); }