#include "libscl.h" #include extern "C" { #include "cblas.h" } using namespace scl; using namespace std; using std::cout; namespace { ostream& maxerr(ostream& os, const realmat& C_save, const realmat& C) { REAL rv = 0.0; for (INTEGER i=1; i<=C.size(); ++i) { REAL err = abs(C[i]-C_save[i])/(abs(C_save[i])+1.0e-5); rv = rv < err ? err : rv; } os << "maxerr = " << rv; return os; } } int main(int argc, char** argp, char** envp) { if (argc != 4) { cerr << "Usage: " << argp[0] << " M N K" << '\n'; return 1; } INTEGER M = atoi(argp[1]); INTEGER N = atoi(argp[2]); INTEGER K = atoi(argp[3]); cout << "M, N, K = " << M << ' ' << N << ' ' << K << '\n'; INTEGER MN=M*N; realmat A(M,K),B(K,N),C(M,N); INT_32BIT seed = 5874; for (INTEGER k=1; k<=K; ++k) { for (INTEGER m=1; m<=M; ++m) A(m,k) = unsk(seed); for (INTEGER n=1; n<=N; ++n) B(k,n) = unsk(seed); } cout << "Using libscl's C = A*B" << '\n'; clock_t clock_start, clock_stop; clock_start = clock(); C = A*B; clock_stop = clock(); cout << "clock = " << REAL(clock_stop - clock_start)/REAL(CLOCKS_PER_SEC) << '\n'; realmat C_save = C; cout << "Using loop unrolling" << '\n'; clock_start = clock(); for (INTEGER i=1; i<=MN; ++i) C[i] = 0.0; INTEGER M0=8*(M/8); INTEGER M1=M-M%8+1; for (INTEGER k=1; k<=K; ++k) { for (INTEGER j=1; j<=N; ++j) { for (INTEGER i=0; i 0) ; } } } clock_stop = clock(); cout << "clock = " << REAL(clock_stop - clock_start)/REAL(CLOCKS_PER_SEC) << '\n'; maxerr(cout,C_save,C) << '\n'; cout << "Using no acceleration techniques" << '\n'; clock_start = clock(); for (INTEGER i=1; i<=MN; ++i) C[i] = 0.0; for (INTEGER k=1; k<=K; ++k) { for (INTEGER j=1; j<=N; ++j) { for (INTEGER i=1; i<=M; ++i) { C(i,j) += A(i,k)*B(k,j); } } } clock_stop = clock(); cout << "clock = " << REAL(clock_stop - clock_start)/REAL(CLOCKS_PER_SEC) << '\n'; maxerr(cout,C_save,C) << '\n'; if (K == 1) { cout << "Using cblas_dgemv" << '\n'; } else { cout << "Using cblas_dgemm" << '\n'; } clock_start = clock(); if (K == 1) { cblas_dgemv(CblasColMajor, CblasNoTrans, M, K, 1.0, A.begin(), M, B.begin(), 1, 0.0, C.begin(), 1); } else { cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1.0, A.begin(), M, B.begin(), K, 0.0, C.begin(), M); } clock_stop = clock(); cout << "clock = " << REAL(clock_stop - clock_start)/REAL(CLOCKS_PER_SEC) << '\n'; maxerr(cout,C_save,C) << '\n'; cout << "Using dgmprd" << '\n'; clock_start = clock(); dgmprd(A,B,C); clock_stop = clock(); cout << "clock = " << REAL(clock_stop - clock_start)/REAL(CLOCKS_PER_SEC) << '\n'; maxerr(cout,C_save,C) << '\n'; return 0; }