ext/nmatrix/math/math.h in nmatrix-0.0.8 vs ext/nmatrix/math/math.h in nmatrix-0.0.9
- old
+ new
@@ -203,11 +203,11 @@
}
// Yale: numeric matrix multiply c=a*b
-template <typename DType, typename IType>
+template <typename DType>
inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga,
const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) {
const unsigned int max_lmn = std::max(std::max(m, n), l);
IType next[max_lmn];
DType sums[max_lmn];
@@ -321,11 +321,10 @@
}
}
*/
// Yale: Symbolic matrix multiply c=a*b
-template <typename IType>
inline size_t symbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const bool diaga,
const IType* ib, const IType* jb, const bool diagb, IType* ic, const bool diagc) {
unsigned int max_lmn = std::max(std::max(m,n), l);
IType mask[max_lmn]; // INDEX in the SMMP paper.
IType j, k; /* Local variables */
@@ -376,19 +375,19 @@
// In-place quicksort (from Wikipedia) -- called by smmp_sort_columns, below. All functions are inclusive of left, right.
namespace smmp_sort {
const size_t THRESHOLD = 4; // switch to insertion sort for 4 elements or fewer
- template <typename DType, typename IType>
+ template <typename DType>
void print_array(DType* vals, IType* array, IType left, IType right) {
for (IType i = left; i <= right; ++i) {
std::cerr << array[i] << ":" << vals[i] << " ";
}
std::cerr << std::endl;
}
- template <typename DType, typename IType>
+ template <typename DType>
IType partition(DType* vals, IType* array, IType left, IType right, IType pivot) {
IType pivotJ = array[pivot];
DType pivotV = vals[pivot];
// Swap pivot and right
@@ -412,12 +411,12 @@
return store;
}
// Recommended to use the median of left, right, and mid for the pivot.
- template <typename IType>
- IType median(IType a, IType b, IType c) {
+ template <typename I>
+ inline I median(I a, I b, I c) {
if (a < b) {
if (b < c) return b; // a b c
if (a < c) return c; // a c b
return a; // c a b
@@ -428,11 +427,11 @@
}
}
// Insertion sort is more efficient than quicksort for small N
- template <typename DType, typename IType>
+ template <typename DType>
void insertion_sort(DType* vals, IType* array, IType left, IType right) {
for (IType idx = left; idx <= right; ++idx) {
IType col_to_insert = array[idx];
DType val_to_insert = vals[idx];
@@ -446,26 +445,26 @@
vals[hole_pos] = val_to_insert;
}
}
- template <typename DType, typename IType>
+ template <typename DType>
void quicksort(DType* vals, IType* array, IType left, IType right) {
if (left < right) {
if (right - left < THRESHOLD) {
insertion_sort(vals, array, left, right);
} else {
// choose any pivot such that left < pivot < right
- IType pivot = median(left, right, (IType)(((unsigned long)left + (unsigned long)right) / 2));
+ IType pivot = median<IType>(left, right, (IType)(((unsigned long)left + (unsigned long)right) / 2));
pivot = partition(vals, array, left, right, pivot);
// recursively sort elements smaller than the pivot
- quicksort<DType,IType>(vals, array, left, pivot-1);
+ quicksort<DType>(vals, array, left, pivot-1);
// recursively sort elements at least as big as the pivot
- quicksort<DType,IType>(vals, array, pivot+1, right);
+ quicksort<DType>(vals, array, pivot+1, right);
}
}
}
@@ -481,109 +480,20 @@
*
* TODO: It's actually probably possible to write an even faster sort, since symbmm/numbmm are not producing a random
* ordering. If someone is doing a lot of Yale matrix multiplication, it might benefit them to consider even insertion
* sort.
*/
-template <typename DType, typename IType>
+template <typename DType>
inline void smmp_sort_columns(const size_t n, const IType* ia, IType* ja, DType* a) {
for (size_t i = 0; i < n; ++i) {
if (ia[i+1] - ia[i] < 2) continue; // no need to sort rows containing only one or two elements.
else if (ia[i+1] - ia[i] <= smmp_sort::THRESHOLD) {
- smmp_sort::insertion_sort<DType, IType>(a, ja, ia[i], ia[i+1]-1); // faster for small rows
+ smmp_sort::insertion_sort<DType>(a, ja, ia[i], ia[i+1]-1); // faster for small rows
} else {
- smmp_sort::quicksort<DType, IType>(a, ja, ia[i], ia[i+1]-1); // faster for large rows (and may call insertion_sort as well)
+ smmp_sort::quicksort<DType>(a, ja, ia[i], ia[i+1]-1); // faster for large rows (and may call insertion_sort as well)
}
}
}
-
-
-
-/*
- * Transposes a generic Yale matrix (old or new). Specify new by setting diaga = true.
- *
- * Based on transp from SMMP (same as symbmm and numbmm).
- *
- * This is not named in the same way as most yale_storage functions because it does not act on a YALE_STORAGE
- * object.
- */
-template <typename DType, typename IType>
-void transpose_yale(const size_t n, const size_t m, const void* ia_, const void* ja_, const void* a_,
- const bool diaga, void* ib_, void* jb_, void* b_, const bool move)
-{
- const IType *ia = reinterpret_cast<const IType*>(ia_),
- *ja = reinterpret_cast<const IType*>(ja_);
- const DType *a = reinterpret_cast<const DType*>(a_);
-
- IType *ib = reinterpret_cast<IType*>(ib_),
- *jb = reinterpret_cast<IType*>(jb_);
- DType *b = reinterpret_cast<DType*>(b_);
-
-
-
- size_t index;
-
- // Clear B
- for (size_t i = 0; i < m+1; ++i) ib[i] = 0;
-
- if (move)
- for (size_t i = 0; i < m+1; ++i) b[i] = 0;
-
- if (diaga) ib[0] = m + 1;
- else ib[0] = 0;
-
- /* count indices for each column */
-
- for (size_t i = 0; i < n; ++i) {
- for (size_t j = ia[i]; j < ia[i+1]; ++j) {
- ++(ib[ja[j]+1]);
- }
- }
-
- for (size_t i = 0; i < m; ++i) {
- ib[i+1] = ib[i] + ib[i+1];
- }
-
- /* now make jb */
-
- for (size_t i = 0; i < n; ++i) {
-
- for (size_t j = ia[i]; j < ia[i+1]; ++j) {
- index = ja[j];
- jb[ib[index]] = i;
-
- if (move)
- b[ib[index]] = a[j];
-
- ++(ib[index]);
- }
- }
-
- /* now fixup ib */
-
- for (size_t i = m; i >= 1; --i) {
- ib[i] = ib[i-1];
- }
-
-
- if (diaga) {
- if (move) {
- size_t j = std::min(n,m);
-
- for (size_t i = 0; i < j; ++i) {
- b[i] = a[i];
- }
- }
- ib[0] = m + 1;
-
- } else {
- ib[0] = 0;
- }
-}
-
-
-
-
-
/*
* From ATLAS 3.8.0:
*