vendor/scs/linsys/gpu/gpu.c in scs-0.4.0 vs vendor/scs/linsys/gpu/gpu.c in scs-0.4.1
- old
+ new
@@ -17,17 +17,17 @@
if (new_buffer_size > *buffer_size) {
if (*buffer != SCS_NULL) {
cudaFree(*buffer);
}
- cudaMalloc(buffer, *buffer_size);
+ cudaMalloc(buffer, new_buffer_size);
*buffer_size = new_buffer_size;
}
CUSPARSE_GEN(SpMV)
(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
- &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
}
/* this is slow, use trans routine if possible */
void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
cusparseDnVecDescr_t y,
@@ -46,16 +46,16 @@
if (new_buffer_size > *buffer_size) {
if (*buffer != SCS_NULL) {
cudaFree(*buffer);
}
- cudaMalloc(buffer, *buffer_size);
+ cudaMalloc(buffer, new_buffer_size);
*buffer_size = new_buffer_size;
}
CUSPARSE_GEN(SpMV)
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
- SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
}
/* This assumes that P has been made full (ie not triangular) and uses the
* fact that the GPU is faster for general sparse matrices than for symmetric
*/