vendor/scs/linsys/gpu/gpu.c in scs-0.4.0 vs vendor/scs/linsys/gpu/gpu.c in scs-0.4.1

- old
+ new

@@ -17,17 +17,17 @@ if (new_buffer_size > *buffer_size) { if (*buffer != SCS_NULL) { cudaFree(*buffer); } - cudaMalloc(buffer, *buffer_size); + cudaMalloc(buffer, new_buffer_size); *buffer_size = new_buffer_size; } CUSPARSE_GEN(SpMV) (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x, - &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer); + &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer); } /* this is slow, use trans routine if possible */ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x, cusparseDnVecDescr_t y, @@ -46,16 +46,16 @@ if (new_buffer_size > *buffer_size) { if (*buffer != SCS_NULL) { cudaFree(*buffer); } - cudaMalloc(buffer, *buffer_size); + cudaMalloc(buffer, new_buffer_size); *buffer_size = new_buffer_size; } CUSPARSE_GEN(SpMV) (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y, - SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer); + SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer); } /* This assumes that P has been made full (ie not triangular) and uses the * fact that the GPU is faster for general sparse matrices than for symmetric */