vendor/scs/linsys/gpu/gpu.h in scs-0.2.3 vs vendor/scs/linsys/gpu/gpu.h in scs-0.3.0

- old
+ new

@@ -8,53 +8,62 @@ #include <cublas_v2.h> #include <cuda.h> #include <cuda_runtime_api.h> #include <cusparse.h> -#include "amatrix.h" #include "glbopts.h" #include "linalg.h" #include "linsys.h" #include "scs.h" +#include "scs_matrix.h" #include "util.h" -#define CUDA_CHECK_ERR \ - do { \ - cudaError_t err = cudaGetLastError(); \ - if (err != cudaSuccess) { \ - printf("%s:%d:%s\n ERROR_CUDA: %s\n", __FILE__, __LINE__, __func__, \ - cudaGetErrorString(err)); \ - } \ +#define CUDA_CHECK_ERR \ + do { \ + cudaDeviceSynchronize(); \ + cudaError_t err = cudaGetLastError(); \ + if (err != cudaSuccess) { \ + scs_printf("%s:%d:%s\n ERROR_CUDA (#): %s\n", __FILE__, __LINE__, \ + __func__, cudaGetErrorString(err)); \ + } \ } while (0) -#ifndef EXTRA_VERBOSE +#if VERBOSITY == 0 #ifndef SFLOAT #define CUBLAS(x) cublasD##x +#define CUBLASI(x) cublasId##x #define CUSPARSE(x) cusparseD##x #else #define CUBLAS(x) cublasS##x +#define CUBLASI(x) cublasIs##x #define CUSPARSE(x) cusparseS##x #endif #define CUSPARSE_GEN(x) cusparse##x #else #ifndef SFLOAT -#define CUBLAS(x) \ - CUDA_CHECK_ERR; \ +#define CUBLAS(x) \ + CUDA_CHECK_ERR; \ cublasD##x -#define CUSPARSE(x) \ - CUDA_CHECK_ERR; \ +#define CUBLASI(x) \ + CUDA_CHECK_ERR; \ + cublasId##x +#define CUSPARSE(x) \ + CUDA_CHECK_ERR; \ cusparseD##x #else -#define CUBLAS(x) \ - CUDA_CHECK_ERR; \ +#define CUBLAS(x) \ + CUDA_CHECK_ERR; \ cublasS##x -#define CUSPARSE(x) \ - CUDA_CHECK_ERR; \ +#define CUBLASI(x) \ + CUDA_CHECK_ERR; \ + cublasIs##x +#define CUSPARSE(x) \ + CUDA_CHECK_ERR; \ cusparseS##x #endif -#define CUSPARSE_GEN(x) \ - CUDA_CHECK_ERR; \ +#define CUSPARSE_GEN(x) \ + CUDA_CHECK_ERR; \ cusparse##x #endif #ifndef SFLOAT #define SCS_CUDA_FLOAT CUDA_R_64F @@ -76,28 +85,36 @@ CSC CSR GPU Mult A (m x n) A' (n x m) Ag accum_by_a_trans_gpu A'(n x m) A (m x n) Agt accum_by_a_gpu */ -/* this struct defines the data matrix A on GPU */ -typedef struct SCS_GPU_A_DATA_MATRIX { +/* this struct defines the data matrix on GPU */ +typedef struct SCS_GPU_DATA_MATRIX { /* A is supplied in column compressed format */ - scs_float *x; /* A values, size: NNZ A */ - scs_int *i; /* A row index, size: NNZ A */ - scs_int *p; /* A column pointer, size: n+1 */ + scs_float *x; /* values, size: NNZ */ + scs_int *i; /* row index, size: NNZ */ + scs_int *p; /* column pointer, size: n+1 */ scs_int m, n; /* m rows, n cols */ - scs_int Annz; /* num non-zeros in A matrix */ + scs_int nnz; /* num non-zeros in matrix */ /* CUDA */ cusparseSpMatDescr_t descr; } ScsGpuMatrix; -void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x, - cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle, - size_t *buffer_size, void **buffer); +void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *A, + const cusparseDnVecDescr_t x, + cusparseDnVecDescr_t y, + cusparseHandle_t cusparse_handle, + size_t *buffer_size, void **buffer); -void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x, - cusparseDnVecDescr_t y, cusparseHandle_t cusparse_handle, - size_t *buffer_size, void **buffer); +void SCS(accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x, + cusparseDnVecDescr_t y, + cusparseHandle_t cusparse_handle, size_t *buffer_size, + void **buffer); + +void SCS(accum_by_p_gpu)(const ScsGpuMatrix *P, const cusparseDnVecDescr_t x, + cusparseDnVecDescr_t y, + cusparseHandle_t cusparse_handle, size_t *buffer_size, + void **buffer); void SCS(free_gpu_matrix)(ScsGpuMatrix *A); #ifdef __cplusplus }