33 #ifndef MADNESS_TENSOR_MTXMQ_H__INCLUDED
34 #define MADNESS_TENSOR_MTXMQ_H__INCLUDED
49 template <
typename aT,
typename bT,
typename cT>
50 void mTxmq(
long dimi,
long dimj,
long dimk,
55 for (
long i=0; i<dimi; ++i,c+=dimj,++
a) {
56 for (
long j=0; j<dimj; ++j) c[j] = 0.0;
57 const aT *aik_ptr =
a;
58 for (
long k=0;
k<dimk; ++
k,aik_ptr+=dimi) {
60 for (
long j=0; j<dimj; ++j) {
61 c[j] += aki*b[
k*dimj+j];
73 template <
typename aT,
typename bT,
typename cT>
75 cT*
c,
const aT*
a,
const bT*
b) {
76 const int alignment = 4;
83 effj = (dimj | 3) + 1;
84 c_buf = (cT*)
malloc(
sizeof(cT)*dimi*effj);
88 if (ext_b%alignment) {
90 bT* b_buf = (bT*)
malloc(
sizeof(bT)*dimk*effj);
93 for (
long k=0;
k<dimk;
k++, bp += effj, b += ext_b)
94 memcpy(bp, b,
sizeof(bT)*dimj);
102 for (
long i=0; i<dimi; ++i,c_work+=effj,++
a) {
103 for (
long j=0; j<dimj; ++j) c_work[j] = 0.0;
104 const aT *aik_ptr =
a;
105 for (
long k=0;
k<dimk; ++
k,aik_ptr+=dimi) {
107 for (
long j=0; j<dimj; ++j) {
108 c_work[j] += aki*b[
k*ext_b+j];
114 if (dimj%alignment) {
116 for (
long i=0; i<dimi; i++, ct += effj, c += dimj)
117 memcpy(c, ct,
sizeof(cT)*dimj);
123 if (free_b) free((bT*)b);
127 double*
c,
const double*
a,
const double*
b);
129 __complex__
double*
c,
const __complex__
double*
a,
const __complex__
double*
b);
131 __complex__
double*
c,
const double*
a,
const __complex__
double*
b);
133 __complex__
double*
c,
const __complex__
double*
a,
const double*
b);
136 inline void mTxmq_padding(
long ni,
long nj,
long nk,
long ej,
137 double*
c,
const double*
a,
const double*
b) {
142 inline void mTxmq_padding(
long ni,
long nj,
long nk,
long ej,
143 __complex__
double*
c,
const __complex__
double*
a,
const __complex__
double*
b) {
148 inline void mTxmq_padding(
long ni,
long nj,
long nk,
long ej,
149 __complex__
double*
c,
const double*
a,
const __complex__
double*
b) {
154 inline void mTxmq_padding(
long ni,
long nj,
long nk,
long ej,
155 __complex__
double*
c,
const __complex__
double*
a,
const double*
b) {
158 #elif defined(HAVE_IBMBGP)
159 extern void bgpmTxmq(
long ni,
long nj,
long nk,
double*
restrict c,
160 const double*
a,
const double*
b);
165 inline void mTxmq(
long ni,
long nj,
long nk,
double*
restrict c,
const double*
a,
const double*
b) {
166 bgpmTxmq(ni, nj, nk, c, a, b);
171 bgpmTxmq(ni, nj, nk, c, a, b);
174 #elif defined(X86_64) && !defined(DISABLE_SSE3)
176 void mTxmq(
long dimi,
long dimj,
long dimk,
177 double*
restrict c,
const double*
a,
const double*
b);
180 void mTxmq(
long dimi,
long dimj,
long dimk,
183 #ifndef __INTEL_COMPILER
185 void mTxmq(
long dimi,
long dimj,
long dimk,
189 #elif defined(X86_32)
191 void mTxmq(
long dimi,
long dimj,
long dimk,
192 double*
restrict c,
const double*
a,
const double*
b);
197 #endif // MADNESS_TENSOR_MTXMQ_H__INCLUDED
std::complex< double > double_complex
Definition: lineplot.cc:16
void bgq_mtxmq_padded(long dimi, long dimj, long dimk, long extb, __complex__ double *c_x, const __complex__ double *a_x, const __complex__ double *b_x)
Definition: bgq_mtxm.cc:10
void mTxmq_padding(long dimi, long dimj, long dimk, long ext_b, cT *c, const aT *a, const bT *b)
Definition: mtxmq.h:74
std::complex< double > double_complex
Definition: mtxmq.h:38
const int k
Definition: dielectric.cc:184
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
void mTxmq(long dimi, long dimj, long dimk, cT *restrict c, const aT *a, const bT *b)
Definition: mtxmq.h:50
#define restrict
Definition: config.h:403
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45
const double c
Definition: gfit.cc:200
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79