ext/nmatrix/util/math.cpp in nmatrix-0.0.2 vs ext/nmatrix/util/math.cpp in nmatrix-0.0.3

- old
+ new

@@ -125,22 +125,35 @@ extern "C" { #ifdef HAVE_CLAPACK_H #include <clapack.h> #endif + static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s); + static VALUE nm_cblas_rotg(VALUE self, VALUE ab); + static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc); - static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda, VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy); - static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb); + static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n, + VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb); + static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a, + VALUE lda, VALUE beta, VALUE c, VALUE ldc); + static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a, + VALUE lda, VALUE beta, VALUE c, VALUE ldc); static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda); - + static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda); + static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb); + static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb); + static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv); + static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda); + static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx); static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx); + static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda); } // end of extern "C" block //////////////////// // Math Functions // @@ -236,11 +249,42 @@ trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha), reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb); } +/* + * Function signature conversion for calling CBLAS' trmm functions as directly as possible. + * + * For documentation: http://www.netlib.org/blas/dtrmm.f + */ +template <typename DType> +inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo, + const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha, + const void* A, const int lda, void* B, const int ldb) +{ + trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha), + reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb); +} + +/* + * Function signature conversion for calling CBLAS' syrk functions as directly as possible. + * + * For documentation: http://www.netlib.org/blas/dsyrk.f + */ +template <typename DType> +inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans, + const int n, const int k, const void* alpha, + const void* A, const int lda, const void* beta, void* C, const int ldc) +{ + syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha), + reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc); +} + + + + }} // end of namespace nm::math extern "C" { @@ -250,17 +294,30 @@ void nm_math_init_blas() { cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK"); rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5); - rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4); + rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5); + rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9); + rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8); + rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5); + rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5); + rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7); + rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4); + rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5); cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS"); + rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7); + rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1); + rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14); rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11); rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12); + rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12); + rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11); + rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11); } /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate, * into an enum recognized by cblas. @@ -323,10 +380,143 @@ rb_raise(rb_eArgError, "Expected :row or :col for order argument"); return CblasRowMajor; } +/* + * Call any of the cblas_xrotg functions as directly as possible. + * + * xROTG computes the elements of a Givens plane rotation matrix such that: + * + * | c s | | a | | r | + * | -s c | * | b | = | 0 | + * + * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1. + * + * The Givens plane rotation can be used to introduce zero elements into a matrix selectively. + * + * This function differs from most of the other raw BLAS accessors. Instead of providing a, b, c, s as arguments, you + * should only provide a and b (the inputs), and you should provide them as a single NVector (or the first two elements + * of any dense NMatrix or NVector type, specifically). + * + * The outputs [c,s] will be returned in a Ruby Array at the end; the input NVector will also be modified in-place. + * + * If you provide rationals, be aware that there's a high probability of an error, since rotg includes a square root -- + * and most rationals' square roots are irrational. You're better off converting to Float first. + * + * This function, like the other cblas_ functions, does minimal type-checking. + */ +static VALUE nm_cblas_rotg(VALUE self, VALUE ab) { + static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = { + NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations. + nm::math::cblas_rotg<float>, + nm::math::cblas_rotg<double>, + nm::math::cblas_rotg<nm::Complex64>, + nm::math::cblas_rotg<nm::Complex128>, + nm::math::cblas_rotg<nm::Rational32>, + nm::math::cblas_rotg<nm::Rational64>, + nm::math::cblas_rotg<nm::Rational128>, + nm::math::cblas_rotg<nm::RubyObject> + }; + + nm::dtype_t dtype = NM_DTYPE(ab); + + if (!ttable[dtype]) { + rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + return Qnil; + + } else { + void *pC = ALLOCA_N(char, DTYPE_SIZES[dtype]), + *pS = ALLOCA_N(char, DTYPE_SIZES[dtype]); + + // extract A and B from the NVector (first two elements) + void* pA = NM_STORAGE_DENSE(ab)->elements; + void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype]; + // c and s are output + + ttable[dtype](pA, pB, pC, pS); + + VALUE result = rb_ary_new2(2); + rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval); + rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval); + + return result; + } +} + + +/* + * Call any of the cblas_xrot functions as directly as possible. + * + * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation. + * + * It's tough to find documentation on xROT. Here are what we think the arguments are for: + * * n :: number of elements to consider in x and y + * * x :: a vector (expects an NVector) + * * incx :: stride of x + * * y :: a vector (expects an NVector) + * * incy :: stride of y + * * c :: cosine of the angle of rotation + * * s :: sine of the angle of rotation + * + * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s + * will be float for Complex64 or double for Complex128. + * + * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible + * with its arguments? + * + * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception + * handling, so you can easily crash Ruby! + */ +static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) { + static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = { + NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations. + nm::math::cblas_rot<float,float>, + nm::math::cblas_rot<double,double>, + nm::math::cblas_rot<nm::Complex64,float>, + nm::math::cblas_rot<nm::Complex128,double>, + nm::math::cblas_rot<nm::Rational32,nm::Rational32>, + nm::math::cblas_rot<nm::Rational64,nm::Rational64>, + nm::math::cblas_rot<nm::Rational128,nm::Rational128>, + nm::math::cblas_rot<nm::RubyObject,nm::RubyObject> + }; + + nm::dtype_t dtype = NM_DTYPE(x); + + + if (!ttable[dtype]) { + rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + return Qfalse; + } else { + void *pC, *pS; + + // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype. + if (dtype == nm::COMPLEX64) { + pC = ALLOCA_N(float,1); + pS = ALLOCA_N(float,1); + rubyval_to_cval(c, nm::FLOAT32, pC); + rubyval_to_cval(s, nm::FLOAT32, pS); + } else if (dtype == nm::COMPLEX128) { + pC = ALLOCA_N(double,1); + pS = ALLOCA_N(double,1); + rubyval_to_cval(c, nm::FLOAT64, pC); + rubyval_to_cval(s, nm::FLOAT64, pS); + } else { + pC = ALLOCA_N(char, DTYPE_SIZES[dtype]); + pS = ALLOCA_N(char, DTYPE_SIZES[dtype]); + rubyval_to_cval(c, dtype, pC); + rubyval_to_cval(s, dtype, pS); + } + + + ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS); + + return Qtrue; + } +} + + /* Call any of the cblas_xgemm functions as directly as possible. * * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation: * * C = alpha*op(A)*op(B) + beta*C @@ -338,11 +528,11 @@ * expose the ultra-optimized ATLAS versions. * * == Arguments * See: http://www.netlib.org/blas/dgemm.f * - * You probably don't want to call this function. Instead, why don't you try cblas_gemm, which is more flexible + * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible * with its arguments? * * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception * handling, so you can easily crash Ruby! */ @@ -356,11 +546,11 @@ VALUE beta, VALUE c, VALUE ldc) { NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc); - dtype_t dtype = NM_DTYPE(a); + nm::dtype_t dtype = NM_DTYPE(a); void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]), *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]); rubyval_to_cval(alpha, dtype, pAlpha); rubyval_to_cval(beta, dtype, pBeta); @@ -401,11 +591,11 @@ VALUE beta, VALUE y, VALUE incy) { NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE trans_a, int m, int n, void* alpha, void* a, int lda, void* x, int incx, void* beta, void* y, int incy); - dtype_t dtype = NM_DTYPE(a); + nm::dtype_t dtype = NM_DTYPE(a); void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]), *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]); rubyval_to_cval(alpha, dtype, pAlpha); rubyval_to_cval(beta, dtype, pBeta); @@ -423,11 +613,11 @@ VALUE a, VALUE lda, VALUE b, VALUE ldb) { static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, - const int, const int, const void* alpha, const void* a, + const int m, const int n, const void* alpha, const void* a, const int lda, void* b, const int ldb) = { NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division nm::math::cblas_trsm<float>, nm::math::cblas_trsm<double>, cblas_ctrsm, cblas_ztrsm, // call directly, same function signature! @@ -435,28 +625,137 @@ nm::math::cblas_trsm<nm::Rational64>, nm::math::cblas_trsm<nm::Rational128>, nm::math::cblas_trsm<nm::RubyObject> }; - dtype_t dtype = NM_DTYPE(a); + nm::dtype_t dtype = NM_DTYPE(a); - void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]); - rubyval_to_cval(alpha, dtype, pAlpha); + if (!ttable[dtype]) { + rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + } else { + void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]); + rubyval_to_cval(alpha, dtype, pAlpha); - ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb)); + ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb)); + } return Qtrue; } +static VALUE nm_cblas_trmm(VALUE self, + VALUE order, + VALUE side, VALUE uplo, + VALUE trans_a, VALUE diag, + VALUE m, VALUE n, + VALUE alpha, + VALUE a, VALUE lda, + VALUE b, VALUE ldb) +{ + static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, + const enum CBLAS_SIDE, const enum CBLAS_UPLO, + const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, + const int m, const int n, const void* alpha, const void* a, + const int lda, void* b, const int ldb) = { + NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division + nm::math::cblas_trmm<float>, + nm::math::cblas_trmm<double>, + cblas_ctrmm, cblas_ztrmm // call directly, same function signature! + /* + nm::math::cblas_trmm<nm::Rational32>, + nm::math::cblas_trmm<nm::Rational64>, + nm::math::cblas_trmm<nm::Rational128>, + nm::math::cblas_trmm<nm::RubyObject>*/ + }; + + nm::dtype_t dtype = NM_DTYPE(a); + + if (!ttable[dtype]) { + rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes"); + } else { + void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]); + rubyval_to_cval(alpha, dtype, pAlpha); + + ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb)); + } + + return b; +} + + +static VALUE nm_cblas_syrk(VALUE self, + VALUE order, + VALUE uplo, + VALUE trans, + VALUE n, VALUE k, + VALUE alpha, + VALUE a, VALUE lda, + VALUE beta, + VALUE c, VALUE ldc) +{ + static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc) = { + NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division + nm::math::cblas_syrk<float>, + nm::math::cblas_syrk<double>, + cblas_csyrk, cblas_zsyrk// call directly, same function signature! + /*nm::math::cblas_trsm<nm::Rational32>, + nm::math::cblas_trsm<nm::Rational64>, + nm::math::cblas_trsm<nm::Rational128>, + nm::math::cblas_trsm<nm::RubyObject>*/ + }; + + nm::dtype_t dtype = NM_DTYPE(a); + + if (!ttable[dtype]) { + rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + } else { + void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]), + *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]); + rubyval_to_cval(alpha, dtype, pAlpha); + rubyval_to_cval(beta, dtype, pBeta); + + ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc)); + } + + return Qtrue; +} + + +static VALUE nm_cblas_herk(VALUE self, + VALUE order, + VALUE uplo, + VALUE trans, + VALUE n, VALUE k, + VALUE alpha, + VALUE a, VALUE lda, + VALUE beta, + VALUE c, VALUE ldc) +{ + + nm::dtype_t dtype = NM_DTYPE(a); + + if (dtype == nm::COMPLEX64) { + cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc)); + } else if (dtype == nm::COMPLEX128) { + cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc)); + } else + rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes"); + + + return Qtrue; +} + + /* * Based on LAPACK's dscal function, but for any dtype. * * In-place modification; returns the modified vector as well. */ static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) { - dtype_t dtype = NM_DTYPE(vector); + nm::dtype_t dtype = NM_DTYPE(vector); void* da = ALLOCA_N(char, DTYPE_SIZES[dtype]); rubyval_to_cval(scale, dtype, da); NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::clapack_scal, void, const int n, const void* da, void* dx, const int incx); @@ -465,11 +764,47 @@ return vector; } -/* Call any of the clpack_xgetrf functions as directly as possible. +static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) { + static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = { + /*nm::math::clapack_lauum<uint8_t, false>, + nm::math::clapack_lauum<int8_t, false>, + nm::math::clapack_lauum<int16_t, false>, + nm::math::clapack_lauum<uint32_t, false>, + nm::math::clapack_lauum<uint64_t, false>,*/ + NULL, NULL, NULL, NULL, NULL, + nm::math::clapack_lauum<false, float>, + nm::math::clapack_lauum<false, double>, +#ifdef HAVE_CLAPACK_H + clapack_clauum, clapack_zlauum, // call directly, same function signature! +#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface. + nm::math::clapack_lauum<true, nm::Complex64>, + nm::math::clapack_lauum<true, nm::Complex128>, +#endif +/* + nm::math::clapack_lauum<nm::Rational32, false>, + nm::math::clapack_lauum<nm::Rational64, false>, + nm::math::clapack_lauum<nm::Rational128, false>, + nm::math::clapack_lauum<nm::RubyObject, false> + +*/ + }; + + if (!ttable[NM_DTYPE(a)]) { + rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)"); + } else { + // Call either our version of lauum or the LAPACK version. + ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda)); + } + + return a; +} + + +/* Call any of the clapack_xgetrf functions as directly as possible. * * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N * matrix A using partial pivoting with row interchanges. * * The factorization has the form: @@ -513,12 +848,16 @@ // Allocate the pivot index array, which is of size MIN(M, N). size_t ipiv_size = std::min(M,N); int* ipiv = ALLOCA_N(int, ipiv_size); - // Call either our version of getrf or the LAPACK version. - ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv); + if (!ttable[NM_DTYPE(a)]) { + rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + } else { + // Call either our version of getrf or the LAPACK version. + ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv); + } // Result will be stored in a. We return ipiv as an array. VALUE ipiv_array = rb_ary_new2(ipiv_size); for (size_t i = 0; i < ipiv_size; ++i) { rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i])); @@ -526,13 +865,291 @@ return ipiv_array; } +/* Call any of the clapack_xpotrf functions as directly as possible. + * + * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible + * with its arguments? + * + * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception + * handling, so you can easily crash Ruby! + * + * Returns an array giving the pivot indices (normally these are argument #5). + */ +static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) { +#ifndef HAVE_CLAPACK_H + rb_raise(rb_eNotImpError, "potrf currently requires LAPACK"); +#endif + + static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = { + NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division + nm::math::clapack_potrf<float>, + nm::math::clapack_potrf<double>, +#ifdef HAVE_CLAPACK_H + clapack_cpotrf, clapack_zpotrf, // call directly, same function signature! +#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface. + nm::math::clapack_potrf<nm::Complex64>, + nm::math::clapack_potrf<nm::Complex128>, +#endif + NULL, NULL, NULL, NULL /* + nm::math::clapack_potrf<nm::Rational32>, + nm::math::clapack_potrf<nm::Rational64>, + nm::math::clapack_potrf<nm::Rational128>, + nm::math::clapack_potrf<nm::RubyObject> */ + }; + + if (!ttable[NM_DTYPE(a)]) { + rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes"); + // FIXME: Once BLAS dtypes are implemented, replace error above with the error below. + //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + } else { + // Call either our version of potrf or the LAPACK version. + ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda)); + } + + return a; +} + + /* + * Call any of the clapack_xgetrs functions as directly as possible. + */ +static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) { + static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N, + const int NRHS, const void* A, const int lda, const int* ipiv, void* B, + const int ldb) = { + NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division + nm::math::clapack_getrs<float>, + nm::math::clapack_getrs<double>, +#ifdef HAVE_CLAPACK_H + clapack_cgetrs, clapack_zgetrs, // call directly, same function signature! +#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface. + nm::math::clapack_getrs<nm::Complex64>, + nm::math::clapack_getrs<nm::Complex128>, +#endif + nm::math::clapack_getrs<nm::Rational32>, + nm::math::clapack_getrs<nm::Rational64>, + nm::math::clapack_getrs<nm::Rational128>, + nm::math::clapack_getrs<nm::RubyObject> + }; + + // Allocate the C version of the pivot index array + // TODO: Allow for an NVector here also, maybe? + int* ipiv_; + if (TYPE(ipiv) != T_ARRAY) { + rb_raise(rb_eArgError, "ipiv must be of type Array"); + } else { + ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv)); + for (int index = 0; index < RARRAY_LEN(ipiv); ++index) { + ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] ); + } + } + + if (!ttable[NM_DTYPE(a)]) { + rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + } else { + + // Call either our version of getrs or the LAPACK version. + ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), + ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb)); + } + + // b is both returned and modified directly in the argument list. + return b; +} + + +/* + * Call any of the clapack_xpotrs functions as directly as possible. + */ +static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) { + static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const int NRHS, const void* A, const int lda, void* B, const int ldb) = { + NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division + nm::math::clapack_potrs<float,false>, + nm::math::clapack_potrs<double,false>, +#ifdef HAVE_CLAPACK_H + clapack_cpotrs, clapack_zpotrs, // call directly, same function signature! +#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface. + nm::math::clapack_potrs<nm::Complex64,true>, + nm::math::clapack_potrs<nm::Complex128,true>, +#endif + nm::math::clapack_potrs<nm::Rational32,false>, + nm::math::clapack_potrs<nm::Rational64,false>, + nm::math::clapack_potrs<nm::Rational128,false>, + nm::math::clapack_potrs<nm::RubyObject,false> + }; + + + if (!ttable[NM_DTYPE(a)]) { + rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + } else { + + // Call either our version of potrs or the LAPACK version. + ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), + NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb)); + } + + // b is both returned and modified directly in the argument list. + return b; +} + + +/* Call any of the clapack_xgetri functions as directly as possible. + * + * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible + * with its arguments? + * + * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception + * handling, so you can easily crash Ruby! + * + * Returns an array giving the pivot indices (normally these are argument #5). + */ +static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) { +#ifndef HAVE_CLAPACK_H + rb_raise(rb_eNotImpError, "getri currently requires LAPACK"); +#endif + + static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = { + NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division + nm::math::clapack_getri<float>, + nm::math::clapack_getri<double>, +#ifdef HAVE_CLAPACK_H + clapack_cgetri, clapack_zgetri, // call directly, same function signature! +#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface. + nm::math::clapack_getri<nm::Complex64>, + nm::math::clapack_getri<nm::Complex128>, +#endif + NULL, NULL, NULL, NULL /* + nm::math::clapack_getri<nm::Rational32>, + nm::math::clapack_getri<nm::Rational64>, + nm::math::clapack_getri<nm::Rational128>, + nm::math::clapack_getri<nm::RubyObject> */ + }; + + // Allocate the C version of the pivot index array + // TODO: Allow for an NVector here also, maybe? + int* ipiv_; + if (TYPE(ipiv) != T_ARRAY) { + rb_raise(rb_eArgError, "ipiv must be of type Array"); + } else { + ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv)); + for (int index = 0; index < RARRAY_LEN(ipiv); ++index) { + ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] ); + } + } + + if (!ttable[NM_DTYPE(a)]) { + rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes"); + // FIXME: Once BLAS dtypes are implemented, replace error above with the error below. + //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + } else { + // Call either our version of getri or the LAPACK version. + ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_); + } + + return a; +} + + +/* Call any of the clapack_xpotri functions as directly as possible. + * + * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible + * with its arguments? + * + * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception + * handling, so you can easily crash Ruby! + * + * Returns an array giving the pivot indices (normally these are argument #5). + */ +static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) { +#ifndef HAVE_CLAPACK_H + rb_raise(rb_eNotImpError, "getri currently requires LAPACK"); +#endif + + static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = { + NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division + nm::math::clapack_potri<float>, + nm::math::clapack_potri<double>, +#ifdef HAVE_CLAPACK_H + clapack_cpotri, clapack_zpotri, // call directly, same function signature! +#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface. + nm::math::clapack_potri<nm::Complex64>, + nm::math::clapack_potri<nm::Complex128>, +#endif + NULL, NULL, NULL, NULL /* + nm::math::clapack_getri<nm::Rational32>, + nm::math::clapack_getri<nm::Rational64>, + nm::math::clapack_getri<nm::Rational128>, + nm::math::clapack_getri<nm::RubyObject> */ + }; + + if (!ttable[NM_DTYPE(a)]) { + rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes"); + // FIXME: Once BLAS dtypes are implemented, replace error above with the error below. + //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices"); + } else { + // Call either our version of getri or the LAPACK version. + ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda)); + } + + return a; +} + + +/* + * Call any of the clapack_xlaswp functions as directly as possible. + * + * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the + * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f. + */ +static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) { + static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = { + nm::math::clapack_laswp<uint8_t>, + nm::math::clapack_laswp<int8_t>, + nm::math::clapack_laswp<int16_t>, + nm::math::clapack_laswp<int32_t>, + nm::math::clapack_laswp<int64_t>, + nm::math::clapack_laswp<float>, + nm::math::clapack_laswp<double>, +//#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h! +// clapack_claswp, clapack_zlaswp, // call directly, same function signature! +//#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface. + nm::math::clapack_laswp<nm::Complex64>, + nm::math::clapack_laswp<nm::Complex128>, +//#endif + nm::math::clapack_laswp<nm::Rational32>, + nm::math::clapack_laswp<nm::Rational64>, + nm::math::clapack_laswp<nm::Rational128>, + nm::math::clapack_laswp<nm::RubyObject> + }; + + // Allocate the C version of the pivot index array + // TODO: Allow for an NVector here also, maybe? + int* ipiv_; + if (TYPE(ipiv) != T_ARRAY) { + rb_raise(rb_eArgError, "ipiv must be of type Array"); + } else { + ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv)); + for (int index = 0; index < RARRAY_LEN(ipiv); ++index) { + ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] ); + } + } + + // Call either our version of laswp or the LAPACK version. + ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx)); + + // a is both returned and modified directly in the argument list. + return a; +} + + +/* * C accessor for calculating an exact determinant. */ -void nm_math_det_exact(const int M, const void* elements, const int lda, dtype_t dtype, void* result) { +void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result) { NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg); ttable[dtype](M, elements, lda, result); }