35 #ifndef MADNESS_TENSOR_MXM_H__INCLUDED 
   36 #define MADNESS_TENSOR_MXM_H__INCLUDED 
   57 template <
typename T, 
typename Q, 
typename S>
 
   58 static inline void mxm(
long dimi, 
long dimj, 
long dimk,
 
   68     for (
long i=0; i<dimi; ++i) {
 
   69         for (
long k=0; 
k<dimk; ++
k) {
 
   71 #pragma _CRI prefervector 
   73             for (
long j=0; j<dimj; ++j) {
 
   74                 c[i*dimj+j] += a[i*dimk+
k]*b[
k*dimj+j];
 
   82 template <
typename T, 
typename Q, 
typename S>
 
   84 void mTxm(
long dimi, 
long dimj, 
long dimk,
 
   96     for (
long k=0; 
k<dimk; ++
k) {
 
   97         for (
long j=0; j<dimj; ++j) {
 
   99 #pragma _CRI prefervector 
  101             for (
long i=0; i<dimi; ++i) {
 
  102                 c[i*dimj+j] += a[
k*dimi+i]*b[
k*dimj+j];
 
  109 template <
typename T, 
typename Q, 
typename S>
 
  110 static inline void mxmT(
long dimi, 
long dimj, 
long dimk,
 
  122     for (
long i=0; i<dimi; ++i) {
 
  123         for (
long j=0; j<dimj; ++j) {
 
  125             for (
long k=0; 
k<dimk; ++
k) {
 
  126                 sum += a[i*dimk+
k]*b[j*dimk+
k];
 
  134 template <
typename T, 
typename Q, 
typename S>
 
  135 static inline void mTxmT(
long dimi, 
long dimj, 
long dimk,
 
  145     for (
long i=0; i<dimi; ++i) {
 
  147 #pragma _CRI prefervector 
  149         for (
long j=0; j<dimj; ++j) {
 
  150             for (
long k=0; 
k<dimk; ++
k) {
 
  151                 c[i*dimj+j] += a[
k*dimi+i]*b[j*dimk+
k];
 
  160 void mTxm(
long dimi, 
long dimj, 
long dimk, 
double* 
restrict c,
 
  168 inline void mTxm(
long dimi, 
long dimj, 
long dimk,
 
  183     long dimk4 = (dimk/4)*4;
 
  184     for (
long i=0; i<dimi; ++i,c+=dimj) {
 
  185         const double* ai = a+i;
 
  187         for (
long k=0; 
k<dimk4; 
k+=4,ai+=4*dimi,p+=4*dimj) {
 
  188             double ak0i = ai[0   ];
 
  189             double ak1i = ai[dimi];
 
  190             double ak2i = ai[dimi+dimi];
 
  191             double ak3i = ai[dimi+dimi+dimi];
 
  192             const double* bk0 = p;
 
  193             const double* bk1 = p+dimj;
 
  194             const double* bk2 = p+dimj+dimj;
 
  195             const double* bk3 = p+dimj+dimj+dimj;
 
  196             for (
long j=0; j<dimj; ++j) {
 
  197                 c[j] += ak0i*bk0[j] + ak1i*bk1[j] + ak2i*bk2[j] + ak3i*bk3[j];
 
  200         for (
long k=dimk4; 
k<dimk; ++
k) {
 
  201             double aki = a[
k*dimi+i];
 
  202             const double* bk = b+
k*dimj;
 
  203             for (
long j=0; j<dimj; ++j) {
 
  215 inline void mxmT(
long dimi, 
long dimj, 
long dimk,
 
  230     long dimi2 = (dimi/2)*2;
 
  231     for (
long i=0; i<dimi2; i+=2) {
 
  232         const double* ai0 = a+i*dimk;
 
  233         const double* ai1 = a+i*dimk+dimk;
 
  235         double* 
restrict ci1 = c+i*dimj+dimj;
 
  236         for (
long j=0; j<dimj; ++j) {
 
  239             const double* bj = b + j*dimk;
 
  240             for (
long k=0; 
k<dimk; ++
k) {
 
  241                 sum0 += ai0[
k]*bj[
k];
 
  242                 sum1 += ai1[
k]*bj[
k];
 
  248     for (
long i=dimi2; i<dimi; ++i) {
 
  249         const double* ai = a+i*dimk;
 
  251         for (
long j=0; j<dimj; ++j) {
 
  253             const double* bj = b+j*dimk;
 
  254             for (
long k=0; 
k<dimk; ++
k) {
 
  264 inline void mxm(
long dimi, 
long dimj, 
long dimk,
 
  276     long dimk4 = (dimk/4)*4;
 
  277     for (
long i=0; i<dimi; ++i, c+=dimj,a+=dimk) {
 
  279         for (
long k=0; 
k<dimk4; 
k+=4,p+=4*dimj) {
 
  281             double aik1 = a[
k+1];
 
  282             double aik2 = a[
k+2];
 
  283             double aik3 = a[
k+3];
 
  284             const double* bk0 = p;
 
  285             const double* bk1 = bk0+dimj;
 
  286             const double* bk2 = bk1+dimj;
 
  287             const double* bk3 = bk2+dimj;
 
  288             for (
long j=0; j<dimj; ++j) {
 
  289                 c[j] += aik0*bk0[j] + aik1*bk1[j] + aik2*bk2[j] + aik3*bk3[j];
 
  292         for (
long k=dimk4; 
k<dimk; ++
k) {
 
  294             for (
long j=0; j<dimj; ++j) {
 
  295                 c[j] += aik*b[
k*dimj+j];
 
  303 inline void mTxmT(
long dimi, 
long dimj, 
long dimk,
 
  316     long dimj2 = (dimj/2)*2;
 
  318     for (
long klo=0; klo<dimk; klo+=ktile, asave+=ktile*dimi, b+=ktile) {
 
  319         long khi = klo+ktile;
 
  320         if (khi > dimk) khi = dimk;
 
  325         for (
long i=0; i<dimi; ++i,c+=dimj,++
a) {
 
  327             for (
long k=0; 
k<nk; ++
k,q+=dimi) ai[
k] = *q;
 
  329             const double* bj0 = 
b;
 
  330             for (
long j=0; j<dimj2; j+=2,bj0+=2*dimk) {
 
  331                 const double* bj1 = bj0+dimk;
 
  334                 for (
long k=0; 
k<nk; ++
k) {
 
  335                     sum0 += ai[
k]*bj0[
k];
 
  336                     sum1 += ai[
k]*bj1[
k];
 
  342             for (
long j=dimj2; j<dimj; ++j,bj0+=dimk) {
 
  344                 for (
long k=0; 
k<nk; ++
k) {
 
  353 #endif // MADNESS_TENSOR_MXM_H__INCLUDED 
const T1 &f1 return GTEST_2_TUPLE_() T(f0, f1)
const int k
Definition: dielectric.cc:184
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
void mxmT(long dimi, long dimj, long dimk, double *restrict c, const double *restrict a, const double *restrict b)
Matrix * matrix transpose (hand unrolled version) 
Definition: mxm.h:215
void mxm(long dimi, long dimj, long dimk, double *restrict c, const double *restrict a, const double *restrict b)
Matrix * matrix (hand unrolled version) 
Definition: mxm.h:264
void mTxm(long dimi, long dimj, long dimk, double *restrict c, const double *restrict a, const double *restrict b)
Matrix transpose * matrix (hand unrolled version) 
Definition: mxm.h:168
const mpreal sum(const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode)
Definition: mpreal.cc:241
#define restrict
Definition: config.h:403
const double c
Definition: gfit.cc:200
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79
void mTxmT(long dimi, long dimj, long dimk, double *restrict csave, const double *restrict asave, const double *restrict b)
Matrix transpose * matrix transpose (hand tiled and unrolled) 
Definition: mxm.h:303