math.cpp in nmatrix-0.0.3

- old
+ new

@@ -125,22 +125,35 @@
 extern "C" {
 #ifdef HAVE_CLAPACK_H
   #include <clapack.h>
 #endif
 
+  static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s);
+  static VALUE nm_cblas_rotg(VALUE self, VALUE ab);
+
   static VALUE nm_cblas_gemm(VALUE self, VALUE order, VALUE trans_a, VALUE trans_b, VALUE m, VALUE n, VALUE k, VALUE vAlpha,
                              VALUE a, VALUE lda, VALUE b, VALUE ldb, VALUE vBeta, VALUE c, VALUE ldc);
-
   static VALUE nm_cblas_gemv(VALUE self, VALUE trans_a, VALUE m, VALUE n, VALUE vAlpha, VALUE a, VALUE lda,
                              VALUE x, VALUE incx, VALUE vBeta, VALUE y, VALUE incy);
-
   static VALUE nm_cblas_trsm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
                              VALUE vAlpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
+  static VALUE nm_cblas_trmm(VALUE self, VALUE order, VALUE side, VALUE uplo, VALUE trans_a, VALUE diag, VALUE m, VALUE n,
+                             VALUE alpha, VALUE a, VALUE lda, VALUE b, VALUE ldb);
+  static VALUE nm_cblas_herk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
+                             VALUE lda, VALUE beta, VALUE c, VALUE ldc);
+  static VALUE nm_cblas_syrk(VALUE self, VALUE order, VALUE uplo, VALUE trans, VALUE n, VALUE k, VALUE alpha, VALUE a,
+                             VALUE lda, VALUE beta, VALUE c, VALUE ldc);
 
   static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a, VALUE lda);
-
+  static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
+  static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb);
+  static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb);
+  static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv);
+  static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
+  static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx);
   static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx);
+  static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda);
 
 } // end of extern "C" block
 
 ////////////////////
 // Math Functions //
@@ -236,11 +249,42 @@
   trsm<DType>(order, side, uplo, trans_a, diag, m, n, *reinterpret_cast<const DType*>(alpha),
               reinterpret_cast<const DType*>(a), lda, reinterpret_cast<DType*>(b), ldb);
 }
 
 
+/*
+ * Function signature conversion for calling CBLAS' trmm functions as directly as possible.
+ *
+ * For documentation: http://www.netlib.org/blas/dtrmm.f
+ */
+template <typename DType>
+inline static void cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo,
+                              const enum CBLAS_TRANSPOSE ta, const enum CBLAS_DIAG diag, const int m, const int n, const void* alpha,
+                              const void* A, const int lda, void* B, const int ldb)
+{
+  trmm<DType>(order, side, uplo, ta, diag, m, n, reinterpret_cast<const DType*>(alpha),
+              reinterpret_cast<const DType*>(A), lda, reinterpret_cast<DType*>(B), ldb);
+}
 
+
+/*
+ * Function signature conversion for calling CBLAS' syrk functions as directly as possible.
+ *
+ * For documentation: http://www.netlib.org/blas/dsyrk.f
+ */
+template <typename DType>
+inline static void cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans,
+                              const int n, const int k, const void* alpha,
+                              const void* A, const int lda, const void* beta, void* C, const int ldc)
+{
+  syrk<DType>(order, uplo, trans, n, k, reinterpret_cast<const DType*>(alpha),
+              reinterpret_cast<const DType*>(A), lda, reinterpret_cast<const DType*>(beta), reinterpret_cast<DType*>(C), ldc);
+}
+
+
+
+
 }} // end of namespace nm::math
 
 
 extern "C" {
 
@@ -250,17 +294,30 @@
 
 void nm_math_init_blas() {
 	cNMatrix_LAPACK = rb_define_module_under(cNMatrix, "LAPACK");
 
   rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrf", (METHOD)nm_clapack_getrf, 5);
-  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal", (METHOD)nm_clapack_scal, 4);
+  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrf", (METHOD)nm_clapack_potrf, 5);
+  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getrs", (METHOD)nm_clapack_getrs, 9);
+  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potrs", (METHOD)nm_clapack_potrs, 8);
+  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_getri", (METHOD)nm_clapack_getri, 5);
+  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_potri", (METHOD)nm_clapack_potri, 5);
+  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_laswp", (METHOD)nm_clapack_laswp, 7);
+  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_scal",  (METHOD)nm_clapack_scal,  4);
+  rb_define_singleton_method(cNMatrix_LAPACK, "clapack_lauum", (METHOD)nm_clapack_lauum, 5);
 
   cNMatrix_BLAS = rb_define_module_under(cNMatrix, "BLAS");
 
+  rb_define_singleton_method(cNMatrix_BLAS, "cblas_rot",  (METHOD)nm_cblas_rot,  7);
+  rb_define_singleton_method(cNMatrix_BLAS, "cblas_rotg", (METHOD)nm_cblas_rotg, 1);
+
 	rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemm", (METHOD)nm_cblas_gemm, 14);
 	rb_define_singleton_method(cNMatrix_BLAS, "cblas_gemv", (METHOD)nm_cblas_gemv, 11);
 	rb_define_singleton_method(cNMatrix_BLAS, "cblas_trsm", (METHOD)nm_cblas_trsm, 12);
+	rb_define_singleton_method(cNMatrix_BLAS, "cblas_trmm", (METHOD)nm_cblas_trmm, 12);
+	rb_define_singleton_method(cNMatrix_BLAS, "cblas_syrk", (METHOD)nm_cblas_syrk, 11);
+	rb_define_singleton_method(cNMatrix_BLAS, "cblas_herk", (METHOD)nm_cblas_herk, 11);
 }
 
 
 /* Interprets cblas argument which could be any of false/:no_transpose, :transpose, or :complex_conjugate,
  * into an enum recognized by cblas.
@@ -323,10 +380,143 @@
   rb_raise(rb_eArgError, "Expected :row or :col for order argument");
   return CblasRowMajor;
 }
 
 
+/*
+ * Call any of the cblas_xrotg functions as directly as possible.
+ *
+ * xROTG computes the elements of a Givens plane rotation matrix such that:
+ *
+ *  |  c s |   | a |   | r |
+ *  | -s c | * | b | = | 0 |
+ *
+ * where r = +- sqrt( a**2 + b**2 ) and c**2 + s**2 = 1.
+ *
+ * The Givens plane rotation can be used to introduce zero elements into a matrix selectively.
+ *
+ * This function differs from most of the other raw BLAS accessors. Instead of providing a, b, c, s as arguments, you
+ * should only provide a and b (the inputs), and you should provide them as a single NVector (or the first two elements
+ * of any dense NMatrix or NVector type, specifically).
+ *
+ * The outputs [c,s] will be returned in a Ruby Array at the end; the input NVector will also be modified in-place.
+ *
+ * If you provide rationals, be aware that there's a high probability of an error, since rotg includes a square root --
+ * and most rationals' square roots are irrational. You're better off converting to Float first.
+ *
+ * This function, like the other cblas_ functions, does minimal type-checking.
+ */
+static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
+  static void (*ttable[nm::NUM_DTYPES])(void* a, void* b, void* c, void* s) = {
+      NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
+      nm::math::cblas_rotg<float>,
+      nm::math::cblas_rotg<double>,
+      nm::math::cblas_rotg<nm::Complex64>,
+      nm::math::cblas_rotg<nm::Complex128>,
+      nm::math::cblas_rotg<nm::Rational32>,
+      nm::math::cblas_rotg<nm::Rational64>,
+      nm::math::cblas_rotg<nm::Rational128>,
+      nm::math::cblas_rotg<nm::RubyObject>
+  };
+
+  nm::dtype_t dtype = NM_DTYPE(ab);
+
+  if (!ttable[dtype]) {
+    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+    return Qnil;
+
+  } else {
+    void *pC = ALLOCA_N(char, DTYPE_SIZES[dtype]),
+         *pS = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+
+    // extract A and B from the NVector (first two elements)
+    void* pA = NM_STORAGE_DENSE(ab)->elements;
+    void* pB = (char*)(NM_STORAGE_DENSE(ab)->elements) + DTYPE_SIZES[dtype];
+    // c and s are output
+
+    ttable[dtype](pA, pB, pC, pS);
+
+    VALUE result = rb_ary_new2(2);
+    rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
+    rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
+
+    return result;
+  }
+}
+
+
+/*
+ * Call any of the cblas_xrot functions as directly as possible.
+ *
+ * xROT is a BLAS level 1 routine (taking two vectors) which applies a plane rotation.
+ *
+ * It's tough to find documentation on xROT. Here are what we think the arguments are for:
+ *  * n     :: number of elements to consider in x and y
+ *  * x     :: a vector (expects an NVector)
+ *  * incx  :: stride of x
+ *  * y     :: a vector (expects an NVector)
+ *  * incy  :: stride of y
+ *  * c     :: cosine of the angle of rotation
+ *  * s     :: sine of the angle of rotation
+ *
+ * Note that c and s will be the same dtype as x and y, except when x and y are complex. If x and y are complex, c and s
+ * will be float for Complex64 or double for Complex128.
+ *
+ * You probably don't want to call this function. Instead, why don't you try rot, which is more flexible
+ * with its arguments?
+ *
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
+ * handling, so you can easily crash Ruby!
+ */
+static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VALUE incy, VALUE c, VALUE s) {
+  static void (*ttable[nm::NUM_DTYPES])(const int N, void*, const int, void*, const int, const void*, const void*) = {
+      NULL, NULL, NULL, NULL, NULL, // can't represent c and s as integers, so no point in having integer operations.
+      nm::math::cblas_rot<float,float>,
+      nm::math::cblas_rot<double,double>,
+      nm::math::cblas_rot<nm::Complex64,float>,
+      nm::math::cblas_rot<nm::Complex128,double>,
+      nm::math::cblas_rot<nm::Rational32,nm::Rational32>,
+      nm::math::cblas_rot<nm::Rational64,nm::Rational64>,
+      nm::math::cblas_rot<nm::Rational128,nm::Rational128>,
+      nm::math::cblas_rot<nm::RubyObject,nm::RubyObject>
+  };
+
+  nm::dtype_t dtype = NM_DTYPE(x);
+
+
+  if (!ttable[dtype]) {
+    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+    return Qfalse;
+  } else {
+    void *pC, *pS;
+
+    // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
+    if (dtype == nm::COMPLEX64) {
+      pC = ALLOCA_N(float,1);
+      pS = ALLOCA_N(float,1);
+      rubyval_to_cval(c, nm::FLOAT32, pC);
+      rubyval_to_cval(s, nm::FLOAT32, pS);
+    } else if (dtype == nm::COMPLEX128) {
+      pC = ALLOCA_N(double,1);
+      pS = ALLOCA_N(double,1);
+      rubyval_to_cval(c, nm::FLOAT64, pC);
+      rubyval_to_cval(s, nm::FLOAT64, pS);
+    } else {
+      pC = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+      pS = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+      rubyval_to_cval(c, dtype, pC);
+      rubyval_to_cval(s, dtype, pS);
+    }
+
+
+    ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), NM_STORAGE_DENSE(y)->elements, FIX2INT(incy), pC, pS);
+
+    return Qtrue;
+  }
+}
+
+
 /* Call any of the cblas_xgemm functions as directly as possible.
  *
  * The cblas_xgemm functions (dgemm, sgemm, cgemm, and zgemm) define the following operation:
  *
  *    C = alpha*op(A)*op(B) + beta*C
@@ -338,11 +528,11 @@
  * expose the ultra-optimized ATLAS versions.
  *
  * == Arguments
  * See: http://www.netlib.org/blas/dgemm.f
  *
- * You probably don't want to call this function. Instead, why don't you try cblas_gemm, which is more flexible
+ * You probably don't want to call this function. Instead, why don't you try gemm, which is more flexible
  * with its arguments?
  *
  * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
  * handling, so you can easily crash Ruby!
  */
@@ -356,11 +546,11 @@
                            VALUE beta,
                            VALUE c, VALUE ldc)
 {
   NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemm, void, const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE trans_a, const enum CBLAS_TRANSPOSE trans_b, int m, int n, int k, void* alpha, void* a, int lda, void* b, int ldb, void* beta, void* c, int ldc);
 
-  dtype_t dtype = NM_DTYPE(a);
+  nm::dtype_t dtype = NM_DTYPE(a);
 
   void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
        *pBeta  = ALLOCA_N(char, DTYPE_SIZES[dtype]);
   rubyval_to_cval(alpha, dtype, pAlpha);
   rubyval_to_cval(beta, dtype, pBeta);
@@ -401,11 +591,11 @@
                            VALUE beta,
                            VALUE y, VALUE incy)
 {
   NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::cblas_gemv, bool, const enum CBLAS_TRANSPOSE trans_a, int m, int n, void* alpha, void* a, int lda, void* x, int incx, void* beta, void* y, int incy);
 
-  dtype_t dtype = NM_DTYPE(a);
+  nm::dtype_t dtype = NM_DTYPE(a);
 
   void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
        *pBeta  = ALLOCA_N(char, DTYPE_SIZES[dtype]);
   rubyval_to_cval(alpha, dtype, pAlpha);
   rubyval_to_cval(beta, dtype, pBeta);
@@ -423,11 +613,11 @@
                            VALUE a, VALUE lda,
                            VALUE b, VALUE ldb)
 {
   static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO,
                                         const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
-                                        const int, const int, const void* alpha, const void* a,
+                                        const int m, const int n, const void* alpha, const void* a,
                                         const int lda, void* b, const int ldb) = {
       NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
       nm::math::cblas_trsm<float>,
       nm::math::cblas_trsm<double>,
       cblas_ctrsm, cblas_ztrsm, // call directly, same function signature!
@@ -435,28 +625,137 @@
       nm::math::cblas_trsm<nm::Rational64>,
       nm::math::cblas_trsm<nm::Rational128>,
       nm::math::cblas_trsm<nm::RubyObject>
   };
 
-  dtype_t dtype = NM_DTYPE(a);
+  nm::dtype_t dtype = NM_DTYPE(a);
 
-  void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
-  rubyval_to_cval(alpha, dtype, pAlpha);
+  if (!ttable[dtype]) {
+    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+  } else {
+    void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+    rubyval_to_cval(alpha, dtype, pAlpha);
 
-  ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
+    ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
+  }
 
   return Qtrue;
 }
 
 
+static VALUE nm_cblas_trmm(VALUE self,
+                           VALUE order,
+                           VALUE side, VALUE uplo,
+                           VALUE trans_a, VALUE diag,
+                           VALUE m, VALUE n,
+                           VALUE alpha,
+                           VALUE a, VALUE lda,
+                           VALUE b, VALUE ldb)
+{
+  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER,
+                                        const enum CBLAS_SIDE, const enum CBLAS_UPLO,
+                                        const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG,
+                                        const int m, const int n, const void* alpha, const void* a,
+                                        const int lda, void* b, const int ldb) = {
+      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
+      nm::math::cblas_trmm<float>,
+      nm::math::cblas_trmm<double>,
+      cblas_ctrmm, cblas_ztrmm // call directly, same function signature!
+      /*
+      nm::math::cblas_trmm<nm::Rational32>,
+      nm::math::cblas_trmm<nm::Rational64>,
+      nm::math::cblas_trmm<nm::Rational128>,
+      nm::math::cblas_trmm<nm::RubyObject>*/
+  };
+
+  nm::dtype_t dtype = NM_DTYPE(a);
+
+  if (!ttable[dtype]) {
+    rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
+  } else {
+    void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+    rubyval_to_cval(alpha, dtype, pAlpha);
+
+    ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
+  }
+
+  return b;
+}
+
+
+static VALUE nm_cblas_syrk(VALUE self,
+                           VALUE order,
+                           VALUE uplo,
+                           VALUE trans,
+                           VALUE n, VALUE k,
+                           VALUE alpha,
+                           VALUE a, VALUE lda,
+                           VALUE beta,
+                           VALUE c, VALUE ldc)
+{
+  static void (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE,
+                                        const int n, const int k, const void* alpha, const void* a,
+                                        const int lda, const void* beta, void* c, const int ldc) = {
+      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
+      nm::math::cblas_syrk<float>,
+      nm::math::cblas_syrk<double>,
+      cblas_csyrk, cblas_zsyrk// call directly, same function signature!
+      /*nm::math::cblas_trsm<nm::Rational32>,
+      nm::math::cblas_trsm<nm::Rational64>,
+      nm::math::cblas_trsm<nm::Rational128>,
+      nm::math::cblas_trsm<nm::RubyObject>*/
+  };
+
+  nm::dtype_t dtype = NM_DTYPE(a);
+
+  if (!ttable[dtype]) {
+    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+  } else {
+    void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
+         *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+    rubyval_to_cval(alpha, dtype, pAlpha);
+    rubyval_to_cval(beta, dtype, pBeta);
+
+    ttable[dtype](blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), pBeta, NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
+  }
+
+  return Qtrue;
+}
+
+
+static VALUE nm_cblas_herk(VALUE self,
+                           VALUE order,
+                           VALUE uplo,
+                           VALUE trans,
+                           VALUE n, VALUE k,
+                           VALUE alpha,
+                           VALUE a, VALUE lda,
+                           VALUE beta,
+                           VALUE c, VALUE ldc)
+{
+
+  nm::dtype_t dtype = NM_DTYPE(a);
+
+  if (dtype == nm::COMPLEX64) {
+    cblas_cherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
+  } else if (dtype == nm::COMPLEX128) {
+    cblas_zherk(blas_order_sym(order), blas_uplo_sym(uplo), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(k), NUM2DBL(alpha), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NUM2DBL(beta), NM_STORAGE_DENSE(c)->elements, FIX2INT(ldc));
+  } else
+    rb_raise(rb_eNotImpError, "this matrix operation undefined for non-complex dtypes");
+
+
+  return Qtrue;
+}
+
+
 /*
  * Based on LAPACK's dscal function, but for any dtype.
  *
  * In-place modification; returns the modified vector as well.
  */
 static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) {
-  dtype_t dtype = NM_DTYPE(vector);
+  nm::dtype_t dtype = NM_DTYPE(vector);
 
   void* da      = ALLOCA_N(char, DTYPE_SIZES[dtype]);
   rubyval_to_cval(scale, dtype, da);
 
   NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::clapack_scal, void, const int n, const void* da, void* dx, const int incx);
@@ -465,11 +764,47 @@
 
   return vector;
 }
 
 
-/* Call any of the clpack_xgetrf functions as directly as possible.
+static VALUE nm_clapack_lauum(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
+  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
+      /*nm::math::clapack_lauum<uint8_t, false>,
+      nm::math::clapack_lauum<int8_t, false>,
+      nm::math::clapack_lauum<int16_t, false>,
+      nm::math::clapack_lauum<uint32_t, false>,
+      nm::math::clapack_lauum<uint64_t, false>,*/
+      NULL, NULL, NULL, NULL, NULL,
+      nm::math::clapack_lauum<false, float>,
+      nm::math::clapack_lauum<false, double>,
+#ifdef HAVE_CLAPACK_H
+      clapack_clauum, clapack_zlauum, // call directly, same function signature!
+#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
+      nm::math::clapack_lauum<true, nm::Complex64>,
+      nm::math::clapack_lauum<true, nm::Complex128>,
+#endif
+/*
+      nm::math::clapack_lauum<nm::Rational32, false>,
+      nm::math::clapack_lauum<nm::Rational64, false>,
+      nm::math::clapack_lauum<nm::Rational128, false>,
+      nm::math::clapack_lauum<nm::RubyObject, false>
+
+*/
+  };
+
+  if (!ttable[NM_DTYPE(a)]) {
+    rb_raise(rb_eNotImpError, "does not yet work for non-BLAS dtypes (needs herk, syrk, trmm)");
+  } else {
+    // Call either our version of lauum or the LAPACK version.
+    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
+  }
+
+  return a;
+}
+
+
+/* Call any of the clapack_xgetrf functions as directly as possible.
  *
  * The clapack_getrf functions (dgetrf, sgetrf, cgetrf, and zgetrf) compute an LU factorization of a general M-by-N
  * matrix A using partial pivoting with row interchanges.
  *
  * The factorization has the form:
@@ -513,12 +848,16 @@
 
   // Allocate the pivot index array, which is of size MIN(M, N).
   size_t ipiv_size = std::min(M,N);
   int* ipiv = ALLOCA_N(int, ipiv_size);
 
-  // Call either our version of getrf or the LAPACK version.
-  ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
+  if (!ttable[NM_DTYPE(a)]) {
+    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+  } else {
+    // Call either our version of getrf or the LAPACK version.
+    ttable[NM_DTYPE(a)](blas_order_sym(order), M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv);
+  }
 
   // Result will be stored in a. We return ipiv as an array.
   VALUE ipiv_array = rb_ary_new2(ipiv_size);
   for (size_t i = 0; i < ipiv_size; ++i) {
     rb_ary_store(ipiv_array, i, INT2FIX(ipiv[i]));
@@ -526,13 +865,291 @@
 
   return ipiv_array;
 }
 
 
+/* Call any of the clapack_xpotrf functions as directly as possible.
+ *
+ * You probably don't want to call this function. Instead, why don't you try clapack_potrf, which is more flexible
+ * with its arguments?
+ *
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
+ * handling, so you can easily crash Ruby!
+ *
+ * Returns an array giving the pivot indices (normally these are argument #5).
+ */
+static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
+#ifndef HAVE_CLAPACK_H
+  rb_raise(rb_eNotImpError, "potrf currently requires LAPACK");
+#endif
+
+  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
+      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
+      nm::math::clapack_potrf<float>,
+      nm::math::clapack_potrf<double>,
+#ifdef HAVE_CLAPACK_H
+      clapack_cpotrf, clapack_zpotrf, // call directly, same function signature!
+#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
+      nm::math::clapack_potrf<nm::Complex64>,
+      nm::math::clapack_potrf<nm::Complex128>,
+#endif
+      NULL, NULL, NULL, NULL /*
+      nm::math::clapack_potrf<nm::Rational32>,
+      nm::math::clapack_potrf<nm::Rational64>,
+      nm::math::clapack_potrf<nm::Rational128>,
+      nm::math::clapack_potrf<nm::RubyObject> */
+  };
+
+  if (!ttable[NM_DTYPE(a)]) {
+    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
+    // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
+    //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+  } else {
+    // Call either our version of potrf or the LAPACK version.
+    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
+  }
+
+  return a;
+}
+
+
 /*
+ * Call any of the clapack_xgetrs functions as directly as possible.
+ */
+static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE ipiv, VALUE b, VALUE ldb) {
+  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE Trans, const int N,
+                                       const int NRHS, const void* A, const int lda, const int* ipiv, void* B,
+                                       const int ldb) = {
+      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
+      nm::math::clapack_getrs<float>,
+      nm::math::clapack_getrs<double>,
+#ifdef HAVE_CLAPACK_H
+      clapack_cgetrs, clapack_zgetrs, // call directly, same function signature!
+#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
+      nm::math::clapack_getrs<nm::Complex64>,
+      nm::math::clapack_getrs<nm::Complex128>,
+#endif
+      nm::math::clapack_getrs<nm::Rational32>,
+      nm::math::clapack_getrs<nm::Rational64>,
+      nm::math::clapack_getrs<nm::Rational128>,
+      nm::math::clapack_getrs<nm::RubyObject>
+  };
+
+  // Allocate the C version of the pivot index array
+  // TODO: Allow for an NVector here also, maybe?
+  int* ipiv_;
+  if (TYPE(ipiv) != T_ARRAY) {
+    rb_raise(rb_eArgError, "ipiv must be of type Array");
+  } else {
+    ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
+    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
+      ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
+    }
+  }
+
+  if (!ttable[NM_DTYPE(a)]) {
+    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+  } else {
+
+    // Call either our version of getrs or the LAPACK version.
+    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_transpose_sym(trans), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
+                        ipiv_, NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
+  }
+
+  // b is both returned and modified directly in the argument list.
+  return b;
+}
+
+
+/*
+ * Call any of the clapack_xpotrs functions as directly as possible.
+ */
+static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE nrhs, VALUE a, VALUE lda, VALUE b, VALUE ldb) {
+  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+                                       const int NRHS, const void* A, const int lda, void* B, const int ldb) = {
+      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
+      nm::math::clapack_potrs<float,false>,
+      nm::math::clapack_potrs<double,false>,
+#ifdef HAVE_CLAPACK_H
+      clapack_cpotrs, clapack_zpotrs, // call directly, same function signature!
+#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
+      nm::math::clapack_potrs<nm::Complex64,true>,
+      nm::math::clapack_potrs<nm::Complex128,true>,
+#endif
+      nm::math::clapack_potrs<nm::Rational32,false>,
+      nm::math::clapack_potrs<nm::Rational64,false>,
+      nm::math::clapack_potrs<nm::Rational128,false>,
+      nm::math::clapack_potrs<nm::RubyObject,false>
+  };
+
+
+  if (!ttable[NM_DTYPE(a)]) {
+    rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+  } else {
+
+    // Call either our version of potrs or the LAPACK version.
+    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), FIX2INT(nrhs), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
+                        NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
+  }
+
+  // b is both returned and modified directly in the argument list.
+  return b;
+}
+
+
+/* Call any of the clapack_xgetri functions as directly as possible.
+ *
+ * You probably don't want to call this function. Instead, why don't you try clapack_getri, which is more flexible
+ * with its arguments?
+ *
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
+ * handling, so you can easily crash Ruby!
+ *
+ * Returns an array giving the pivot indices (normally these are argument #5).
+ */
+static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
+#ifndef HAVE_CLAPACK_H
+  rb_raise(rb_eNotImpError, "getri currently requires LAPACK");
+#endif
+
+  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
+      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
+      nm::math::clapack_getri<float>,
+      nm::math::clapack_getri<double>,
+#ifdef HAVE_CLAPACK_H
+      clapack_cgetri, clapack_zgetri, // call directly, same function signature!
+#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
+      nm::math::clapack_getri<nm::Complex64>,
+      nm::math::clapack_getri<nm::Complex128>,
+#endif
+      NULL, NULL, NULL, NULL /*
+      nm::math::clapack_getri<nm::Rational32>,
+      nm::math::clapack_getri<nm::Rational64>,
+      nm::math::clapack_getri<nm::Rational128>,
+      nm::math::clapack_getri<nm::RubyObject> */
+  };
+
+  // Allocate the C version of the pivot index array
+  // TODO: Allow for an NVector here also, maybe?
+  int* ipiv_;
+  if (TYPE(ipiv) != T_ARRAY) {
+    rb_raise(rb_eArgError, "ipiv must be of type Array");
+  } else {
+    ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
+    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
+      ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
+    }
+  }
+
+  if (!ttable[NM_DTYPE(a)]) {
+    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
+    // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
+    //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+  } else {
+    // Call either our version of getri or the LAPACK version.
+    ttable[NM_DTYPE(a)](blas_order_sym(order), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), ipiv_);
+  }
+
+  return a;
+}
+
+
+/* Call any of the clapack_xpotri functions as directly as possible.
+ *
+ * You probably don't want to call this function. Instead, why don't you try clapack_potri, which is more flexible
+ * with its arguments?
+ *
+ * This function does almost no type checking. Seriously, be really careful when you call it! There's no exception
+ * handling, so you can easily crash Ruby!
+ *
+ * Returns an array giving the pivot indices (normally these are argument #5).
+ */
+static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
+#ifndef HAVE_CLAPACK_H
+  rb_raise(rb_eNotImpError, "getri currently requires LAPACK");
+#endif
+
+  static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
+      NULL, NULL, NULL, NULL, NULL, // integers not allowed due to division
+      nm::math::clapack_potri<float>,
+      nm::math::clapack_potri<double>,
+#ifdef HAVE_CLAPACK_H
+      clapack_cpotri, clapack_zpotri, // call directly, same function signature!
+#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
+      nm::math::clapack_potri<nm::Complex64>,
+      nm::math::clapack_potri<nm::Complex128>,
+#endif
+      NULL, NULL, NULL, NULL /*
+      nm::math::clapack_getri<nm::Rational32>,
+      nm::math::clapack_getri<nm::Rational64>,
+      nm::math::clapack_getri<nm::Rational128>,
+      nm::math::clapack_getri<nm::RubyObject> */
+  };
+
+  if (!ttable[NM_DTYPE(a)]) {
+    rb_raise(rb_eNotImpError, "this operation not yet implemented for non-BLAS dtypes");
+    // FIXME: Once BLAS dtypes are implemented, replace error above with the error below.
+    //rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
+  } else {
+    // Call either our version of getri or the LAPACK version.
+    ttable[NM_DTYPE(a)](blas_order_sym(order), blas_uplo_sym(uplo), FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda));
+  }
+
+  return a;
+}
+
+
+/*
+ * Call any of the clapack_xlaswp functions as directly as possible.
+ *
+ * Note that LAPACK's xlaswp functions accept a column-order matrix, but NMatrix uses row-order. Thus, n should be the
+ * number of rows and lda should be the number of columns, no matter what it says in the documentation for dlaswp.f.
+ */
+static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1, VALUE k2, VALUE ipiv, VALUE incx) {
+  static void (*ttable[nm::NUM_DTYPES])(const int n, void* a, const int lda, const int k1, const int k2, const int* ipiv, const int incx) = {
+      nm::math::clapack_laswp<uint8_t>,
+      nm::math::clapack_laswp<int8_t>,
+      nm::math::clapack_laswp<int16_t>,
+      nm::math::clapack_laswp<int32_t>,
+      nm::math::clapack_laswp<int64_t>,
+      nm::math::clapack_laswp<float>,
+      nm::math::clapack_laswp<double>,
+//#ifdef HAVE_CLAPACK_H // laswp doesn't actually exist in clapack.h!
+//      clapack_claswp, clapack_zlaswp, // call directly, same function signature!
+//#else // Especially important for Mac OS, which doesn't seem to include the ATLAS clapack interface.
+      nm::math::clapack_laswp<nm::Complex64>,
+      nm::math::clapack_laswp<nm::Complex128>,
+//#endif
+      nm::math::clapack_laswp<nm::Rational32>,
+      nm::math::clapack_laswp<nm::Rational64>,
+      nm::math::clapack_laswp<nm::Rational128>,
+      nm::math::clapack_laswp<nm::RubyObject>
+  };
+
+  // Allocate the C version of the pivot index array
+  // TODO: Allow for an NVector here also, maybe?
+  int* ipiv_;
+  if (TYPE(ipiv) != T_ARRAY) {
+    rb_raise(rb_eArgError, "ipiv must be of type Array");
+  } else {
+    ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
+    for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
+      ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
+    }
+  }
+
+  // Call either our version of laswp or the LAPACK version.
+  ttable[NM_DTYPE(a)](FIX2INT(n), NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), FIX2INT(k1), FIX2INT(k2), ipiv_, FIX2INT(incx));
+
+  // a is both returned and modified directly in the argument list.
+  return a;
+}
+
+
+/*
  * C accessor for calculating an exact determinant.
  */
-void nm_math_det_exact(const int M, const void* elements, const int lda, dtype_t dtype, void* result) {
+void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result) {
   NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg);
 
   ttable[dtype](M, elements, lda, result);
 }