ext/nmatrix/math/trsm.h in nmatrix-0.2.0 vs ext/nmatrix/math/trsm.h in nmatrix-0.2.1

- old
+ new

@@ -86,186 +86,186 @@ for (int j = 0; j < n; ++j) { for (int i = 0; i < m; ++i) { b[i + j * ldb] = 0; } } - return; + return; } if (side == CblasLeft) { - if (trans_a == CblasNoTrans) { + if (trans_a == CblasNoTrans) { /* Form B := alpha*inv( A )*B. */ - if (uplo == CblasUpper) { - for (int j = 0; j < n; ++j) { - if (alpha != 1) { - for (int i = 0; i < m; ++i) { - b[i + j * ldb] = alpha * b[i + j * ldb]; - } - } - for (int k = m-1; k >= 0; --k) { - if (b[k + j * ldb] != 0) { - if (diag == CblasNonUnit) { - b[k + j * ldb] /= a[k + k * lda]; - } + if (uplo == CblasUpper) { + for (int j = 0; j < n; ++j) { + if (alpha != 1) { + for (int i = 0; i < m; ++i) { + b[i + j * ldb] = alpha * b[i + j * ldb]; + } + } + for (int k = m-1; k >= 0; --k) { + if (b[k + j * ldb] != 0) { + if (diag == CblasNonUnit) { + b[k + j * ldb] /= a[k + k * lda]; + } for (int i = 0; i < k-1; ++i) { b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda]; } - } - } - } - } else { - for (int j = 0; j < n; ++j) { - if (alpha != 1) { + } + } + } + } else { + for (int j = 0; j < n; ++j) { + if (alpha != 1) { for (int i = 0; i < m; ++i) { b[i + j * ldb] = alpha * b[i + j * ldb]; - } - } - for (int k = 0; k < m; ++k) { - if (b[k + j * ldb] != 0.) { - if (diag == CblasNonUnit) { - b[k + j * ldb] /= a[k + k * lda]; - } - for (int i = k+1; i < m; ++i) { - b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda]; - } - } - } - } - } - } else { // CblasTrans + } + } + for (int k = 0; k < m; ++k) { + if (b[k + j * ldb] != 0.) { + if (diag == CblasNonUnit) { + b[k + j * ldb] /= a[k + k * lda]; + } + for (int i = k+1; i < m; ++i) { + b[i + j * ldb] -= b[k + j * ldb] * a[i + k * lda]; + } + } + } + } + } + } else { // CblasTrans /* Form B := alpha*inv( A**T )*B. */ - if (uplo == CblasUpper) { - for (int j = 0; j < n; ++j) { - for (int i = 0; i < m; ++i) { - DType temp = alpha * b[i + j * ldb]; + if (uplo == CblasUpper) { + for (int j = 0; j < n; ++j) { + for (int i = 0; i < m; ++i) { + DType temp = alpha * b[i + j * ldb]; for (int k = 0; k < i; ++k) { // limit was i-1. Lots of similar bugs in this code, probably. temp -= a[k + i * lda] * b[k + j * ldb]; - } - if (diag == CblasNonUnit) { - temp /= a[i + i * lda]; - } - b[i + j * ldb] = temp; - } - } - } else { - for (int j = 0; j < n; ++j) { - for (int i = m-1; i >= 0; --i) { - DType temp= alpha * b[i + j * ldb]; - for (int k = i+1; k < m; ++k) { - temp -= a[k + i * lda] * b[k + j * ldb]; - } - if (diag == CblasNonUnit) { - temp /= a[i + i * lda]; - } - b[i + j * ldb] = temp; - } - } - } - } + } + if (diag == CblasNonUnit) { + temp /= a[i + i * lda]; + } + b[i + j * ldb] = temp; + } + } + } else { + for (int j = 0; j < n; ++j) { + for (int i = m-1; i >= 0; --i) { + DType temp= alpha * b[i + j * ldb]; + for (int k = i+1; k < m; ++k) { + temp -= a[k + i * lda] * b[k + j * ldb]; + } + if (diag == CblasNonUnit) { + temp /= a[i + i * lda]; + } + b[i + j * ldb] = temp; + } + } + } + } } else { // right side - if (trans_a == CblasNoTrans) { + if (trans_a == CblasNoTrans) { /* Form B := alpha*B*inv( A ). */ - if (uplo == CblasUpper) { - for (int j = 0; j < n; ++j) { - if (alpha != 1) { - for (int i = 0; i < m; ++i) { - b[i + j * ldb] = alpha * b[i + j * ldb]; - } - } - for (int k = 0; k < j-1; ++k) { - if (a[k + j * lda] != 0) { - for (int i = 0; i < m; ++i) { - b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb]; - } - } - } - if (diag == CblasNonUnit) { - DType temp = 1 / a[j + j * lda]; - for (int i = 0; i < m; ++i) { - b[i + j * ldb] = temp * b[i + j * ldb]; - } - } - } - } else { - for (int j = n-1; j >= 0; --j) { - if (alpha != 1) { - for (int i = 0; i < m; ++i) { - b[i + j * ldb] = alpha * b[i + j * ldb]; - } - } + if (uplo == CblasUpper) { + for (int j = 0; j < n; ++j) { + if (alpha != 1) { + for (int i = 0; i < m; ++i) { + b[i + j * ldb] = alpha * b[i + j * ldb]; + } + } + for (int k = 0; k < j-1; ++k) { + if (a[k + j * lda] != 0) { + for (int i = 0; i < m; ++i) { + b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb]; + } + } + } + if (diag == CblasNonUnit) { + DType temp = 1 / a[j + j * lda]; + for (int i = 0; i < m; ++i) { + b[i + j * ldb] = temp * b[i + j * ldb]; + } + } + } + } else { + for (int j = n-1; j >= 0; --j) { + if (alpha != 1) { + for (int i = 0; i < m; ++i) { + b[i + j * ldb] = alpha * b[i + j * ldb]; + } + } - for (int k = j+1; k < n; ++k) { - if (a[k + j * lda] != 0.) { - for (int i = 0; i < m; ++i) { - b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb]; - } - } - } - if (diag == CblasNonUnit) { - DType temp = 1 / a[j + j * lda]; + for (int k = j+1; k < n; ++k) { + if (a[k + j * lda] != 0.) { + for (int i = 0; i < m; ++i) { + b[i + j * ldb] -= a[k + j * lda] * b[i + k * ldb]; + } + } + } + if (diag == CblasNonUnit) { + DType temp = 1 / a[j + j * lda]; - for (int i = 0; i < m; ++i) { - b[i + j * ldb] = temp * b[i + j * ldb]; - } - } - } - } - } else { // CblasTrans + for (int i = 0; i < m; ++i) { + b[i + j * ldb] = temp * b[i + j * ldb]; + } + } + } + } + } else { // CblasTrans /* Form B := alpha*B*inv( A**T ). */ - if (uplo == CblasUpper) { - for (int k = n-1; k >= 0; --k) { - if (diag == CblasNonUnit) { - DType temp= 1 / a[k + k * lda]; - for (int i = 0; i < m; ++i) { - b[i + k * ldb] = temp * b[i + k * ldb]; - } - } - for (int j = 0; j < k-1; ++j) { - if (a[j + k * lda] != 0.) { - DType temp= a[j + k * lda]; - for (int i = 0; i < m; ++i) { - b[i + j * ldb] -= temp * b[i + k * ldb]; - } - } - } - if (alpha != 1) { - for (int i = 0; i < m; ++i) { - b[i + k * ldb] = alpha * b[i + k * ldb]; - } - } - } - } else { - for (int k = 0; k < n; ++k) { - if (diag == CblasNonUnit) { - DType temp = 1 / a[k + k * lda]; - for (int i = 0; i < m; ++i) { - b[i + k * ldb] = temp * b[i + k * ldb]; - } - } - for (int j = k+1; j < n; ++j) { - if (a[j + k * lda] != 0.) { - DType temp = a[j + k * lda]; - for (int i = 0; i < m; ++i) { - b[i + j * ldb] -= temp * b[i + k * ldb]; - } - } - } - if (alpha != 1) { - for (int i = 0; i < m; ++i) { - b[i + k * ldb] = alpha * b[i + k * ldb]; - } - } - } - } - } + if (uplo == CblasUpper) { + for (int k = n-1; k >= 0; --k) { + if (diag == CblasNonUnit) { + DType temp= 1 / a[k + k * lda]; + for (int i = 0; i < m; ++i) { + b[i + k * ldb] = temp * b[i + k * ldb]; + } + } + for (int j = 0; j < k-1; ++j) { + if (a[j + k * lda] != 0.) { + DType temp= a[j + k * lda]; + for (int i = 0; i < m; ++i) { + b[i + j * ldb] -= temp * b[i + k * ldb]; + } + } + } + if (alpha != 1) { + for (int i = 0; i < m; ++i) { + b[i + k * ldb] = alpha * b[i + k * ldb]; + } + } + } + } else { + for (int k = 0; k < n; ++k) { + if (diag == CblasNonUnit) { + DType temp = 1 / a[k + k * lda]; + for (int i = 0; i < m; ++i) { + b[i + k * ldb] = temp * b[i + k * ldb]; + } + } + for (int j = k+1; j < n; ++j) { + if (a[j + k * lda] != 0.) { + DType temp = a[j + k * lda]; + for (int i = 0; i < m; ++i) { + b[i + j * ldb] -= temp * b[i + k * ldb]; + } + } + } + if (alpha != 1) { + for (int i = 0; i < m; ++i) { + b[i + k * ldb] = alpha * b[i + k * ldb]; + } + } + } + } + } } } /* * BLAS' DTRSM function, generalized.