vendor/scs/linsys/gpu/gpu.c in scs-0.2.2 vs vendor/scs/linsys/gpu/gpu.c in scs-0.2.3
- old
+ new
@@ -1,34 +1,73 @@
#include "gpu.h"
-void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const scs_float *x,
- scs_float *y, cusparseHandle_t cusparse_handle) {
+void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
+ cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle,
+ size_t *buffer_size, void **buffer) {
/* y += A'*x
x and y MUST be on GPU already
*/
const scs_float onef = 1.0;
- CUSPARSE(csrmv)
- (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, Ag->n, Ag->m, Ag->Annz,
- &onef, Ag->descr, Ag->x, Ag->p, Ag->i, x, &onef, y);
+ size_t new_buffer_size = 0;
+
+ CUSPARSE_GEN(SpMV_bufferSize)
+ (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+ &onef, Ag->descr, x, &onef, y,
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
+ &new_buffer_size);
+
+ if (new_buffer_size > *buffer_size) {
+ if (*buffer != SCS_NULL) {
+ cudaFree(*buffer);
+ }
+ cudaMalloc(buffer, *buffer_size);
+ *buffer_size = new_buffer_size;
+ }
+
+ CUSPARSE_GEN(SpMV)
+ (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
+ &onef, Ag->descr, x, &onef, y,
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
+ buffer);
}
-void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const scs_float *x,
- scs_float *y, cusparseHandle_t cusparse_handle) {
+void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
+ cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle,
+ size_t *buffer_size, void **buffer) {
/* y += A*x
x and y MUST be on GPU already
*/
const scs_float onef = 1.0;
+ size_t new_buffer_size = 0;
+
/* The A matrix idx pointers must be ORDERED */
- CUSPARSE(csrmv)
- (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, Ag->n, Ag->m, Ag->Annz, &onef,
- Ag->descr, Ag->x, Ag->p, Ag->i, x, &onef, y);
+
+ CUSPARSE_GEN(SpMV_bufferSize)
+ (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
+ &onef, Ag->descr, x, &onef, y,
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
+ &new_buffer_size);
+
+ if (new_buffer_size > *buffer_size) {
+ if (*buffer != SCS_NULL) {
+ cudaFree(*buffer);
+ }
+ cudaMalloc(buffer, *buffer_size);
+ *buffer_size = new_buffer_size;
+ }
+
+ CUSPARSE_GEN(SpMV)
+ (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
+ &onef, Ag->descr, x, &onef, y,
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
+ buffer);
}
void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
cudaFree(A->x);
cudaFree(A->i);
cudaFree(A->p);
- cusparseDestroyMatDescr(A->descr);
+ cusparseDestroySpMat(A->descr);
}
void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k,
ScsScaling *scal) {
SCS(_normalize_a)(A, stgs, k, scal);