38 #ifdef MADNESS_HAS_ELEMENTAL
48 #include <elemental.hpp>
59 Value(
int i,
int j,
T t) : i(i), j(j), t(t) {}
61 template <
typename Archive>
68 class MadToElemDistCopy {
69 elem::DistMatrix<T>& d;
71 MadToElemDistCopy(elem::DistMatrix<T>& d) : d(d) {}
73 void operator()(
const Value<T>& v) {
79 class ElemToMadDistCopy {
80 DistributedMatrix<T>& d;
82 ElemToMadDistCopy(DistributedMatrix<T>& d) : d(d) {}
84 void operator()(
const Value<T>& v) {
94 ProcessID Owner(
const elem::DistMatrix<T>& d,
int i,
int j) {
95 int RowOwner = (i+d.ColAlignment()) % d.ColStride();
96 int ColOwner = (j+d.RowAlignment()) % d.RowStride();
97 return RowOwner+ColOwner*d.ColStride();
104 template <
typename T>
105 void copy_to_elemental(
const DistributedMatrix<T>& din, elem::DistMatrix<T>& dout) {
106 BinSorter< detail::Value<T> , detail::MadToElemDistCopy<T> > s(din.get_world(), detail::MadToElemDistCopy<T>(dout));
108 int64_t ilo, ihi, jlo, jhi;
109 din.local_colrange(ilo,ihi);
110 din.local_rowrange(jlo,jhi);
111 const Tensor<T>& t = din.data();
112 for (int64_t i=ilo; i<=ihi; i++) {
113 for (int64_t j=jlo; j<=jhi; j++) {
115 s.insert(owner, detail::Value<T>(i,j,t(i-ilo, j-jlo)));
125 template <
typename T>
126 void copy_from_elemental(
const elem::DistMatrix<T>& din, DistributedMatrix<T>& dout) {
127 BinSorter< detail::Value<T> , detail::ElemToMadDistCopy<T> > s(dout.get_world(), detail::ElemToMadDistCopy<T>(dout));
129 const int64_t colShift = din.ColShift();
130 const int64_t rowShift = din.RowShift();
131 const int64_t colStride = din.ColStride();
132 const int64_t rowStride = din.RowStride();
133 const int64_t localHeight = din.LocalHeight();
134 const int64_t localWidth = din.LocalWidth();
136 for( int64_t jLocal=0; jLocal<localWidth; ++jLocal ) {
137 for( int64_t iLocal=0; iLocal<localHeight; ++iLocal ) {
138 const int64_t i = colShift + iLocal*colStride;
139 const int64_t j = rowShift + jLocal*rowStride;
142 s.insert(owner, detail::Value<T>(i,j,din.GetLocal(iLocal,jLocal)));
163 template <
typename T>
164 void sygv(
const DistributedMatrix<T>& A,
165 const DistributedMatrix<T>&
B,
167 DistributedMatrix<T>& X,
168 Tensor<
typename Tensor<T>::scalar_type >& e)
170 const int64_t n = A.coldim();
171 const int64_t
m = A.rowdim();
172 MADNESS_ASSERT(n==m);
173 MADNESS_ASSERT(n==B.coldim() && m==B.rowdim());
175 const int blocksize = 128;
176 const elem::Grid GG(A.get_world().mpi.comm().Get_mpi_comm() );
177 elem::SetBlocksize(blocksize);
179 elem::DistMatrix<T> EA(n,n,GG);
180 elem::DistMatrix<T> EB(n,n,GG);
182 copy_to_elemental(A,EA);
183 copy_to_elemental(B,EB);
185 elem::HermitianGenDefiniteEigType eigType = elem::AXBX;
186 elem::UpperOrLower uplo = elem::CharToUpperOrLower(
'U');
187 elem::DistMatrix<T> Xd(n,n,GG);
188 elem::DistMatrix<T,elem::VR,elem::STAR> wd( n, n, GG);
194 elem::HermitianGenDefiniteEig(eigType, uplo, EA, EB, wd, Xd);
195 elem::hermitian_eig::Sort(wd, Xd);
197 A.get_world().mpi.Barrier();
199 X = DistributedMatrix<T>(A.distribution());
200 e = Tensor<typename Tensor<T>::scalar_type>(n);
202 copy_from_elemental(Xd, X);
204 const int localHeight1 = wd.LocalHeight();
205 const int colShift1 = wd.ColShift();
206 const int colStride1= wd.ColStride();
207 T * buffer = e.ptr();
208 for(
int iLocal=0; iLocal<localHeight1; ++iLocal ) {
210 const int i = colShift1 + iLocal*colStride1;
212 buffer[i]= wd.GetLocal( iLocal, jLocal);
215 A.get_world().gop.sum(e.ptr(),n);
232 template <
typename T>
233 void sygvp(World& world,
234 const Tensor<T>&
a,
const Tensor<T>& B,
int itype,
235 Tensor<T>&
V, Tensor<
typename Tensor<T>::scalar_type >& e) {
236 TENSOR_ASSERT(a.ndim() == 2,
"sygv requires a matrix",a.ndim(),&
a);
237 TENSOR_ASSERT(a.dim(0) == a.dim(1),
"sygv requires square matrix",0,&
a);
238 TENSOR_ASSERT(B.ndim() == 2,
"sygv requires a matrix",B.ndim(),&
a);
239 TENSOR_ASSERT(B.dim(0) == B.dim(1),
"sygv requires square matrix",0,&
a);
240 TENSOR_ASSERT(a.iscontiguous(),
"sygvp requires a contiguous matrix (a)",0,&
a);
241 TENSOR_ASSERT(B.iscontiguous(),
"sygvp requires a contiguous matrix (B)",0,&
B);
243 world.gop.broadcast(a.ptr(), a.size(), 0);
244 world.gop.broadcast(B.ptr(), B.size(), 0);
246 const int n = a.dim(1);
248 e = Tensor<typename Tensor<T>::scalar_type>(n);
253 sygv(a, B, itype, V, e);
259 const int blocksize = 128;
261 const elem::Grid GG( world.mpi.comm().Get_mpi_comm() );
262 elem::SetBlocksize(blocksize);
264 elem::DistMatrix<T> gd( n, n, GG );
266 const int colShift = gd.ColShift();
267 const int rowShift = gd.RowShift();
268 const int colStride =gd.ColStride();
269 const int rowStride = gd.RowStride();
270 const int localHeight = gd.LocalHeight();
271 const int localWidth = gd.LocalWidth();
273 const T * buffer = a.ptr();
274 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
276 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
278 const int i = colShift + iLocal*colStride;
279 const int j = rowShift + jLocal*rowStride;
281 gd.SetLocal( iLocal, jLocal, buffer[i+j*n] );
287 elem::DistMatrix<T> hd( n, n, GG );
289 const T * buffer = B.ptr();
290 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
292 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
294 const int i = colShift + iLocal*colStride;
295 const int j = rowShift + jLocal*rowStride;
297 hd.SetLocal( iLocal, jLocal, buffer[i+j*(n)] );
305 elem::HermitianGenDefiniteEigType eigType = elem::AXBX;
307 elem::UpperOrLower uplo = elem::CharToUpperOrLower(
'U');
308 elem::DistMatrix<T> Xd( n, n, GG );
309 elem::DistMatrix<T,elem::VR,elem::STAR> wd( n, n, GG);
316 elem::HermitianGenDefiniteEig( eigType, uplo, gd, hd, wd, Xd);
317 elem::hermitian_eig::Sort( wd, Xd );
324 const int localHeight1 = wd.LocalHeight();
325 const int colShift1 = wd.ColShift();
326 const int colStride1 =wd.ColStride();
327 T * buffer = e.ptr();
328 for(
int iLocal=0; iLocal<localHeight1; ++iLocal )
331 const int i = colShift1 + iLocal*colStride1;
333 buffer[i]= wd.GetLocal( iLocal, jLocal);
337 world.gop.sum(e.ptr(),n);
341 T * buffer = V.ptr();
342 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
344 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
346 const int i = colShift + iLocal*colStride;
347 const int j = rowShift + jLocal*rowStride;
349 buffer[i+j*n]= Xd.GetLocal( iLocal, jLocal);
353 world.gop.sum(V.ptr(), n*n);
358 catch (TensorException S) {
359 std::cerr << S << std::endl;
377 template <
typename T>
378 void gesvp(World& world,
const Tensor<T>& a,
const Tensor<T>&
b, Tensor<T>& x) {
382 int n = a.dim(0), m = a.dim(1), nrhs = b.dim(1);
385 TENSOR_ASSERT(b.ndim() <= 2,
"gesv require a vector or matrix for the RHS",b.ndim(),&
b);
386 TENSOR_ASSERT(a.dim(0) == b.dim(0),
"gesv matrix and RHS must conform",b.ndim(),&
b);
387 TENSOR_ASSERT(a.iscontiguous(),
"gesvp requires a contiguous matrix (a)",0,&
a);
388 TENSOR_ASSERT(b.iscontiguous(),
"gesvp requires a contiguous matrix (b)",0,&
b);
389 world.gop.broadcast(a.ptr(), a.size(), 0);
390 world.gop.broadcast(b.ptr(), b.size(), 0);
398 const elem::Grid GG( world.mpi.comm().Get_mpi_comm() );
399 elem::SetBlocksize(blocksize);
400 elem::DistMatrix<T> gd( n, n, GG );
404 const int colShift = gd.ColShift();
405 const int rowShift = gd.RowShift();
406 const int colStride =gd.ColStride();
407 const int rowStride = gd.RowStride();
408 const int localHeight = gd.LocalHeight();
409 const int localWidth = gd.LocalWidth();
411 const T * buffer = AT.ptr();
412 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
414 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
416 const int i = colShift + iLocal*colStride;
417 const int j = rowShift + jLocal*rowStride;
419 gd.SetLocal( iLocal, jLocal, buffer[i+j*n] );
432 x = Tensor<T>(n,nrhs);
436 for(
int i=0; i< x.size(); ++i) {
437 T * buffer = x.ptr() ;
443 elem::DistMatrix<T> hd( n, nrhs, GG );
445 const int colShift = hd.ColShift();
446 const int rowShift = hd.RowShift();
447 const int colStride =hd.ColStride();
448 const int rowStride = hd.RowStride();
449 const int localHeight = hd.LocalHeight();
450 const int localWidth = hd.LocalWidth();
452 const T * buffer = bT.ptr();
453 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
455 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
457 const int i = colShift + iLocal*colStride;
458 const int j = rowShift + jLocal*rowStride;
460 hd.SetLocal( iLocal, jLocal, buffer[i+j*(n)]);
468 elem::GaussianElimination(gd, hd);
472 const int colShift = hd.ColShift();
473 const int rowShift = hd.RowShift();
474 const int colStride =hd.ColStride();
475 const int rowStride = hd.RowStride();
476 const int localHeight = hd.LocalHeight();
477 const int localWidth = hd.LocalWidth();
479 T * buffer = x.ptr();
480 for(
int jLocal=0; jLocal<localWidth; ++jLocal )
482 for(
int iLocal=0; iLocal<localHeight; ++iLocal )
484 const int i = colShift + iLocal*colStride;
485 const int j = rowShift + jLocal*rowStride;
487 buffer[j+i*nrhs]= hd.GetLocal( iLocal, jLocal);
491 world.gop.sum(x.ptr(), n*nrhs);
496 catch (TensorException S) {
497 std::cerr << S << std::endl;
507 template <
typename T>
509 const Tensor<T>& a,
const Tensor<T>& B,
int itype,
510 Tensor<T>& V, Tensor<
typename Tensor<T>::scalar_type >& e) {
511 sygv(a, B, itype, V, e);
515 template <
typename T>
516 void gesvp(
World& world,
const Tensor<T>& a,
const Tensor<T>& b, Tensor<T>& x) {
521 #endif //MADNESS_HAS_ELEMENTAL
Tensor< double > B
Definition: tdse1d.cc:167
void gesv(const Tensor< T > &a, const Tensor< T > &b, Tensor< T > &x)
Solve Ax = b for general A using the LAPACK *gesv routines.
Definition: lapack.cc:339
#define TENSOR_ASSERT(condition, msg, value, t)
Definition: tensorexcept.h:130
double V(const Vector< double, 3 > &r)
Definition: apps/ii/hatom_energy.cc:46
This header should include pretty much everything needed for the parallel runtime.
Defines and implements most of Tensor.
void sygvp(World &world, const Tensor< T > &a, const Tensor< T > &B, int itype, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Definition: elem.h:508
const T1 &f1 return GTEST_2_TUPLE_() T(f0, f1)
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence. ...
Definition: mra.h:1835
Prototypes for a partial interface from Tensor to LAPACK.
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
A parallel world with full functionality wrapping an MPI communicator.
Definition: worldfwd.h:416
enable_if_c< is_serializable< T >::value &&is_output_archive< Archive >::value >::type serialize(const Archive &ar, const T *t, unsigned int n)
Definition: archive.h:615
void gesvp(World &world, const Tensor< T > &a, const Tensor< T > &b, Tensor< T > &x)
Definition: elem.h:516
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:37
const double m
Definition: gfit.cc:199
Defines simple templates for printing to std::cout "a la Python".
void sygv(const Tensor< T > &A, const Tensor< T > &B, int itype, Tensor< T > &V, Tensor< typename Tensor< T >::scalar_type > &e)
Generalized real-symmetric or complex-Hermitian eigenproblem.
Definition: lapack.cc:519
Tensor< T > transpose(const Tensor< T > &t)
Returns a new deep copy of the transpose of the input tensor.
Definition: tensor.h:1945
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79