ext/nmatrix/util/math.cpp in nmatrix-0.0.4 vs ext/nmatrix/util/math.cpp in nmatrix-0.0.5

- old
+ new

@@ -125,10 +125,12 @@ extern "C" { #ifdef HAVE_CLAPACK_H #include <clapack.h> #endif + static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx); + static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx); static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s); static VALUE nm_cblas_rotg(VALUE self, VALUE ab); static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc); @@ -305,10 +307,12 @@ rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4); rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5); cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS"); + rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3); + rb_define_singleton_method(cNMatrix_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3); rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7); rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1); rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14); rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11); @@ -511,9 +515,117 @@ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS); return Qtrue; } } + + +/* + * Call any of the cblas_xnrm2 functions as directly as possible. + * + * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x. + * + * Arguments: + * * n :: length of x, must be at least 0 + * * x :: pointer to first entry of input vector + * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive) + * + * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible + * with its arguments? + * + * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception + * handling, so you can easily crash Ruby! + */ +static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) { + + static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = { +/* nm::math::cblas_nrm2<uint8_t,uint8_t>, + nm::math::cblas_nrm2<int8_t,int8_t>, + nm::math::cblas_nrm2<int16_t,int16_t>, + nm::math::cblas_nrm2<int32_t,int32_t>, */ + NULL, NULL, NULL, NULL, NULL, // no help for integers + nm::math::cblas_nrm2<float32_t,float32_t>, + nm::math::cblas_nrm2<float64_t,float64_t>, + nm::math::cblas_nrm2<float32_t,nm::Complex64>, + nm::math::cblas_nrm2<float64_t,nm::Complex128>, + nm::math::cblas_nrm2<nm::Rational32,nm::Rational32>, + nm::math::cblas_nrm2<nm::Rational64,nm::Rational64>, + nm::math::cblas_nrm2<nm::Rational128,nm::Rational128>, + nm::math::cblas_nrm2<nm::RubyObject,nm::RubyObject> + }; + + nm::dtype_t dtype = NM_DTYPE(x); + + if (!ttable[dtype]) { + rb_raise(nm_eDataTypeError, "this vector operation undefined for integer vectors"); + return Qnil; + + } else { + // Determine the return dtype and allocate it + nm::dtype_t rdtype = dtype; + if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32; + else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64; + + void *Result = ALLOCA_N(char, DTYPE_SIZES[rdtype]); + + ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result); + + return rubyobj_from_cval(Result, rdtype).rval; + } +} + + + +/* + * Call any of the cblas_xasum functions as directly as possible. + * + * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries + * of a vector x. + * + * Arguments: + * * n :: length of x, must be at least 0 + * * x :: pointer to first entry of input vector + * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive) + * + * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible + * with its arguments? + * + * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception + * handling, so you can easily crash Ruby! + */ +static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) { + + static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = { + nm::math::cblas_asum<uint8_t,uint8_t>, + nm::math::cblas_asum<int8_t,int8_t>, + nm::math::cblas_asum<int16_t,int16_t>, + nm::math::cblas_asum<int32_t,int32_t>, + nm::math::cblas_asum<int64_t,int64_t>, + nm::math::cblas_asum<float32_t,float32_t>, + nm::math::cblas_asum<float64_t,float64_t>, + nm::math::cblas_asum<float32_t,nm::Complex64>, + nm::math::cblas_asum<float64_t,nm::Complex128>, + nm::math::cblas_asum<nm::Rational32,nm::Rational32>, + nm::math::cblas_asum<nm::Rational64,nm::Rational64>, + nm::math::cblas_asum<nm::Rational128,nm::Rational128>, + nm::math::cblas_asum<nm::RubyObject,nm::RubyObject> + }; + + nm::dtype_t dtype = NM_DTYPE(x); + + // Determine the return dtype and allocate it + nm::dtype_t rdtype = dtype; + if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32; + else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64; + + void *Result = ALLOCA_N(char, DTYPE_SIZES[rdtype]); + + ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result); + + return rubyobj_from_cval(Result, rdtype).rval; +} + + /* Call any of the cblas_xgemm functions as directly as possible. * * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation: