ext/nmatrix/util/math.cpp in nmatrix-0.0.4 vs ext/nmatrix/util/math.cpp in nmatrix-0.0.5
- old
+ new
@@ -125,10 +125,12 @@
extern "C" {
#ifdef HAVE_CLAPACK_H
#include <clapack.h>
#endif
+ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
+ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx);
static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
@@ -305,10 +307,12 @@
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_nrm2", (METHOD)nm_cblas_nrm2, 3);
+ rb_define_singleton_method(cNMatrix_BLAS, "cblas_asum", (METHOD)nm_cblas_asum, 3);
rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot", (METHOD)nm_cblas_rot, 7);
rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
@@ -511,9 +515,117 @@
ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
return Qtrue;
}
}
+
+
+/*
+ * Call any of the cblas_xnrm2 functions as directly as possible.
+ *
+ * xNRM2 is a BLAS level 1 routine which calculates the 2-norm of an n-vector x.
+ *
+ * Arguments:
+ * * n :: length of x, must be at least 0
+ * * x :: pointer to first entry of input vector
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
+ *
+ * You probably don't want to call this function. Instead, why don't you try nrm2, which is more flexible
+ * with its arguments?
+ *
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
+ * handling, so you can easily crash Ruby!
+ */
+static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
+
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
+/* nm::math::cblas_nrm2<uint8_t,uint8_t>,
+ nm::math::cblas_nrm2<int8_t,int8_t>,
+ nm::math::cblas_nrm2<int16_t,int16_t>,
+ nm::math::cblas_nrm2<int32_t,int32_t>, */
+ NULL, NULL, NULL, NULL, NULL, // no help for integers
+ nm::math::cblas_nrm2<float32_t,float32_t>,
+ nm::math::cblas_nrm2<float64_t,float64_t>,
+ nm::math::cblas_nrm2<float32_t,nm::Complex64>,
+ nm::math::cblas_nrm2<float64_t,nm::Complex128>,
+ nm::math::cblas_nrm2<nm::Rational32,nm::Rational32>,
+ nm::math::cblas_nrm2<nm::Rational64,nm::Rational64>,
+ nm::math::cblas_nrm2<nm::Rational128,nm::Rational128>,
+ nm::math::cblas_nrm2<nm::RubyObject,nm::RubyObject>
+ };
+
+ nm::dtype_t dtype = NM_DTYPE(x);
+
+ if (!ttable[dtype]) {
+ rb_raise(nm_eDataTypeError, "this vector operation undefined for integer vectors");
+ return Qnil;
+
+ } else {
+ // Determine the return dtype and allocate it
+ nm::dtype_t rdtype = dtype;
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
+
+ void *Result = ALLOCA_N(char, DTYPE_SIZES[rdtype]);
+
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
+
+ return rubyobj_from_cval(Result, rdtype).rval;
+ }
+}
+
+
+
+/*
+ * Call any of the cblas_xasum functions as directly as possible.
+ *
+ * xASUM is a BLAS level 1 routine which calculates the sum of absolute values of the entries
+ * of a vector x.
+ *
+ * Arguments:
+ * * n :: length of x, must be at least 0
+ * * x :: pointer to first entry of input vector
+ * * incx :: stride of x, must be POSITIVE (ATLAS says non-zero, but 3.8.4 code only allows positive)
+ *
+ * You probably don't want to call this function. Instead, why don't you try asum, which is more flexible
+ * with its arguments?
+ *
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
+ * handling, so you can easily crash Ruby!
+ */
+static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
+
+ static void (*ttable[nm::NUM_DTYPES])(const int N, const void* X, const int incX, void* sum) = {
+ nm::math::cblas_asum<uint8_t,uint8_t>,
+ nm::math::cblas_asum<int8_t,int8_t>,
+ nm::math::cblas_asum<int16_t,int16_t>,
+ nm::math::cblas_asum<int32_t,int32_t>,
+ nm::math::cblas_asum<int64_t,int64_t>,
+ nm::math::cblas_asum<float32_t,float32_t>,
+ nm::math::cblas_asum<float64_t,float64_t>,
+ nm::math::cblas_asum<float32_t,nm::Complex64>,
+ nm::math::cblas_asum<float64_t,nm::Complex128>,
+ nm::math::cblas_asum<nm::Rational32,nm::Rational32>,
+ nm::math::cblas_asum<nm::Rational64,nm::Rational64>,
+ nm::math::cblas_asum<nm::Rational128,nm::Rational128>,
+ nm::math::cblas_asum<nm::RubyObject,nm::RubyObject>
+ };
+
+ nm::dtype_t dtype = NM_DTYPE(x);
+
+ // Determine the return dtype and allocate it
+ nm::dtype_t rdtype = dtype;
+ if (dtype == nm::COMPLEX64) rdtype = nm::FLOAT32;
+ else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
+
+ void *Result = ALLOCA_N(char, DTYPE_SIZES[rdtype]);
+
+ ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
+
+ return rubyobj_from_cval(Result, rdtype).rval;
+}
+
+
/* Call any of the cblas_xgemm functions as directly as possible.
*
* The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation: