ext/nmatrix/math/math.h in nmatrix-0.0.8 vs ext/nmatrix/math/math.h in nmatrix-0.0.9

- old
+ new

@@ -203,11 +203,11 @@ } // Yale: numeric matrix multiply c=a*b -template <typename DType, typename IType> +template <typename DType> inline void numbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const DType* a, const bool diaga, const IType* ib, const IType* jb, const DType* b, const bool diagb, IType* ic, IType* jc, DType* c, const bool diagc) { const unsigned int max_lmn = std::max(std::max(m, n), l); IType next[max_lmn]; DType sums[max_lmn]; @@ -321,11 +321,10 @@ } } */ // Yale: Symbolic matrix multiply c=a*b -template <typename IType> inline size_t symbmm(const unsigned int n, const unsigned int m, const unsigned int l, const IType* ia, const IType* ja, const bool diaga, const IType* ib, const IType* jb, const bool diagb, IType* ic, const bool diagc) { unsigned int max_lmn = std::max(std::max(m,n), l); IType mask[max_lmn]; // INDEX in the SMMP paper. IType j, k; /* Local variables */ @@ -376,19 +375,19 @@ // In-place quicksort (from Wikipedia) -- called by smmp_sort_columns, below. All functions are inclusive of left, right. namespace smmp_sort { const size_t THRESHOLD = 4; // switch to insertion sort for 4 elements or fewer - template <typename DType, typename IType> + template <typename DType> void print_array(DType* vals, IType* array, IType left, IType right) { for (IType i = left; i <= right; ++i) { std::cerr << array[i] << ":" << vals[i] << " "; } std::cerr << std::endl; } - template <typename DType, typename IType> + template <typename DType> IType partition(DType* vals, IType* array, IType left, IType right, IType pivot) { IType pivotJ = array[pivot]; DType pivotV = vals[pivot]; // Swap pivot and right @@ -412,12 +411,12 @@ return store; } // Recommended to use the median of left, right, and mid for the pivot. - template <typename IType> - IType median(IType a, IType b, IType c) { + template <typename I> + inline I median(I a, I b, I c) { if (a < b) { if (b < c) return b; // a b c if (a < c) return c; // a c b return a; // c a b @@ -428,11 +427,11 @@ } } // Insertion sort is more efficient than quicksort for small N - template <typename DType, typename IType> + template <typename DType> void insertion_sort(DType* vals, IType* array, IType left, IType right) { for (IType idx = left; idx <= right; ++idx) { IType col_to_insert = array[idx]; DType val_to_insert = vals[idx]; @@ -446,26 +445,26 @@ vals[hole_pos] = val_to_insert; } } - template <typename DType, typename IType> + template <typename DType> void quicksort(DType* vals, IType* array, IType left, IType right) { if (left < right) { if (right - left < THRESHOLD) { insertion_sort(vals, array, left, right); } else { // choose any pivot such that left < pivot < right - IType pivot = median(left, right, (IType)(((unsigned long)left + (unsigned long)right) / 2)); + IType pivot = median<IType>(left, right, (IType)(((unsigned long)left + (unsigned long)right) / 2)); pivot = partition(vals, array, left, right, pivot); // recursively sort elements smaller than the pivot - quicksort<DType,IType>(vals, array, left, pivot-1); + quicksort<DType>(vals, array, left, pivot-1); // recursively sort elements at least as big as the pivot - quicksort<DType,IType>(vals, array, pivot+1, right); + quicksort<DType>(vals, array, pivot+1, right); } } } @@ -481,109 +480,20 @@ * * TODO: It's actually probably possible to write an even faster sort, since symbmm/numbmm are not producing a random * ordering. If someone is doing a lot of Yale matrix multiplication, it might benefit them to consider even insertion * sort. */ -template <typename DType, typename IType> +template <typename DType> inline void smmp_sort_columns(const size_t n, const IType* ia, IType* ja, DType* a) { for (size_t i = 0; i < n; ++i) { if (ia[i+1] - ia[i] < 2) continue; // no need to sort rows containing only one or two elements. else if (ia[i+1] - ia[i] <= smmp_sort::THRESHOLD) { - smmp_sort::insertion_sort<DType, IType>(a, ja, ia[i], ia[i+1]-1); // faster for small rows + smmp_sort::insertion_sort<DType>(a, ja, ia[i], ia[i+1]-1); // faster for small rows } else { - smmp_sort::quicksort<DType, IType>(a, ja, ia[i], ia[i+1]-1); // faster for large rows (and may call insertion_sort as well) + smmp_sort::quicksort<DType>(a, ja, ia[i], ia[i+1]-1); // faster for large rows (and may call insertion_sort as well) } } } - - - -/* - * Transposes a generic Yale matrix (old or new). Specify new by setting diaga = true. - * - * Based on transp from SMMP (same as symbmm and numbmm). - * - * This is not named in the same way as most yale_storage functions because it does not act on a YALE_STORAGE - * object. - */ -template <typename DType, typename IType> -void transpose_yale(const size_t n, const size_t m, const void* ia_, const void* ja_, const void* a_, - const bool diaga, void* ib_, void* jb_, void* b_, const bool move) -{ - const IType *ia = reinterpret_cast<const IType*>(ia_), - *ja = reinterpret_cast<const IType*>(ja_); - const DType *a = reinterpret_cast<const DType*>(a_); - - IType *ib = reinterpret_cast<IType*>(ib_), - *jb = reinterpret_cast<IType*>(jb_); - DType *b = reinterpret_cast<DType*>(b_); - - - - size_t index; - - // Clear B - for (size_t i = 0; i < m+1; ++i) ib[i] = 0; - - if (move) - for (size_t i = 0; i < m+1; ++i) b[i] = 0; - - if (diaga) ib[0] = m + 1; - else ib[0] = 0; - - /* count indices for each column */ - - for (size_t i = 0; i < n; ++i) { - for (size_t j = ia[i]; j < ia[i+1]; ++j) { - ++(ib[ja[j]+1]); - } - } - - for (size_t i = 0; i < m; ++i) { - ib[i+1] = ib[i] + ib[i+1]; - } - - /* now make jb */ - - for (size_t i = 0; i < n; ++i) { - - for (size_t j = ia[i]; j < ia[i+1]; ++j) { - index = ja[j]; - jb[ib[index]] = i; - - if (move) - b[ib[index]] = a[j]; - - ++(ib[index]); - } - } - - /* now fixup ib */ - - for (size_t i = m; i >= 1; --i) { - ib[i] = ib[i-1]; - } - - - if (diaga) { - if (move) { - size_t j = std::min(n,m); - - for (size_t i = 0; i < j; ++i) { - b[i] = a[i]; - } - } - ib[0] = m + 1; - - } else { - ib[0] = 0; - } -} - - - - - /* * From ATLAS 3.8.0: *