1 #ifndef MADNESS_DISTRIBUTED_MATRIX_H
2 #define MADNESS_DISTRIBUTED_MATRIX_H
58 class DistributedMatrixDistribution;
59 template <
typename T>
class DistributedMatrix;
61 static inline DistributedMatrixDistribution column_distributed_matrix_distribution(World& world, int64_t n, int64_t
m, int64_t coltile=0);
62 static inline DistributedMatrixDistribution row_distributed_matrix_distribution(World& world, int64_t n, int64_t
m, int64_t rowtile=0);
65 DistributedMatrix<T>
concatenate_rows(
const DistributedMatrix<T>&
a,
const DistributedMatrix<T>&
b);
68 DistributedMatrix<T>
interleave_rows(
const DistributedMatrix<T>&
a,
const DistributedMatrix<T>&
b);
113 , Pcoldim((n-1)/tilen+1)
114 , Prowdim((m-1)/tilem+1)
116 , Prow(rank - Pcol*Prowdim)
118 , ihi(
std::
min(ilo+tilen-1,n-1))
120 , jhi(
std::
min(jlo+tilem-1,m-1))
121 , idim(
std::
max(ihi-ilo+1,int64_t(0)))
122 , jdim(
std::
max(jhi-jlo+1,int64_t(0)))
124 if (ilo > ihi || jlo > jhi) {
157 pworld = (
World*)(0);
158 P = rank = n = m = tilen = tilem = Pcoldim = Prowdim = Pcol = Prow = ilo = ihi = jlo = jhi = idim = jdim = 0;
297 void get_range(
int p, int64_t& ilow, int64_t& ihigh, int64_t& jlow, int64_t& jhigh)
const {
389 template <
typename T>
390 class DistributedMatrix :
public DistributedMatrixDistribution {
391 friend DistributedMatrix<T> interleave_rows<T>(
const DistributedMatrix<T>&
a,
const DistributedMatrix<T>&
b);
392 friend DistributedMatrix<T> concatenate_rows<T>(
const DistributedMatrix<T>&
a,
const DistributedMatrix<T>&
b);
396 static T idij(
const int64_t i,
const int64_t j) {
return (i==j) ?
T(1) :
T(0);}
413 if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
429 if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
436 , t(deepcopy ?
copy(A.t) : A.t)
443 DistributedMatrixDistribution::operator=(A);
462 template <
typename funcT>
464 for (int64_t i=ilo; i<=ihi; i++) {
465 for (int64_t j=jlo; j<=jhi; j++) {
466 t(i-ilo,j-jlo) =
f(i,j);
513 const Tensor<T>&
data()
const {
return t;}
531 MADNESS_ASSERT(s.iscontiguous());
548 if (i0<=i1 && j0<=j1) {
549 t(
Slice(i0-ilo,i1-ilo),
Slice(j0-jlo,j1-jlo)) = s(
Slice(i0-ilow,i1-ilow),
Slice(j0-jlow,j1-jlow));
563 MADNESS_ASSERT(s.iscontiguous());
569 if (i0<=i1 && j0<=j1) {
570 t(
Slice(i0-ilo,i1-ilo),
Slice(j0-jlo,j1-jlo)) = s(
Slice(i0-ilow,i1-ilow),
Slice(j0-jlow,j1-jlow));
576 int newrowdim = jhigh - jlow + 1;
577 MADNESS_ASSERT(jlow >= 0);
578 MADNESS_ASSERT(jhigh <
rowdim());
579 MADNESS_ASSERT(newrowdim == U.rowdim());
580 MADNESS_ASSERT(
coldim() == U.coldim());
582 MADNESS_ASSERT(U.is_column_distributed());
583 MADNESS_ASSERT(
coltile() == U.coltile());
589 if (i0<=i1 && j0<=j1) {
590 U.data()(___) = t(
Slice(i0-ilo,i1-ilo),
Slice(j0-jlo,j1-jlo));
594 template <
typename R>
616 return copy(*
this)+=A;
630 void set(int64_t i, int64_t j,
const T x) {
631 MADNESS_ASSERT(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
636 T get(int64_t i, int64_t j)
const {
637 MADNESS_ASSERT(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
638 return t(i-ilo,j-jlo);
647 template <
typename T>
661 static inline DistributedMatrixDistribution
662 column_distributed_matrix_distribution(
World& world, int64_t n, int64_t
m, int64_t coltile) {
663 if (world.
size()*coltile < n) coltile = (n-1)/world.
size() + 1;
665 if ((coltile&0x1)) ++coltile;
679 template <
typename T>
692 static inline DistributedMatrixDistribution
693 row_distributed_matrix_distribution(
World& world, int64_t n, int64_t
m, int64_t rowtile) {
694 if (world.
size()*rowtile <
m) rowtile = (m-1)/world.
size() + 1;
707 template <
typename T>
724 template <
typename T>
725 DistributedMatrix<T>
interleave_rows(
const DistributedMatrix<T>& a,
const DistributedMatrix<T>&
b) {
726 MADNESS_ASSERT(a.rowdim()==b.rowdim() && a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.rowtile()==b.rowtile());
728 DistributedMatrix<T>
c(a.get_world(), a.coldim()*2, a.rowdim(), a.coltile()*2, a.rowtile());
729 c.data()(Slice(0,-1,2),_) = a.data()(___);
730 c.data()(Slice(1,-1,2),_) = b.data()(___);
745 template <
typename T>
746 DistributedMatrix<T>
concatenate_rows(
const DistributedMatrix<T>& a,
const DistributedMatrix<T>& b) {
747 MADNESS_ASSERT(a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.is_column_distributed() && b.is_column_distributed());
749 int64_t ma = a.rowdim();
750 int64_t mb = b.rowdim();
752 DistributedMatrix<T>
c(a.get_world(), a.coldim(), ma+mb, a.coltile(), ma+mb);
755 a.local_colrange(ilow, ihigh);
757 c.data()(_,Slice(0,ma-1)) = a.data()(___);
758 c.data()(_,Slice(ma,-1)) = b.data()(___);
776 template <
typename T>
777 DistributedMatrix<T>
concatenate_rows(
const DistributedMatrix<T>& a,
const DistributedMatrix<T>& b,
const DistributedMatrix<T>&
c,
const DistributedMatrix<T>& d) {
778 MADNESS_ASSERT(a.coldim()==b.coldim() && b.coldim()==c.coldim() && c.coldim()==d.coldim());
779 MADNESS_ASSERT(a.coltile()==b.coltile() && b.coltile()==c.coltile() && c.coltile()==d.coltile());
780 MADNESS_ASSERT(a.is_column_distributed() && b.is_column_distributed() && c.is_column_distributed() && d.is_column_distributed());
782 int64_t ma = a.rowdim();
783 int64_t mb = b.rowdim();
784 int64_t mc = c.rowdim();
785 int64_t md = d.rowdim();
787 DistributedMatrix<T> result(a.get_world(), a.coldim(), ma+mb+mc+md, a.coltile(), ma+mb+mc+md);
789 if(a.local_size() > 0) result.data()( _ , Slice(0,ma-1) ) = a.data()(___);
790 if(b.local_size() > 0) result.data()( _ , Slice(ma, ma+mb-1) ) = b.data()(___);
791 if(c.local_size() > 0) result.data()( _ , Slice(ma+mb, ma+mb+mc-1) ) = c.data()(___);
792 if(d.local_size() > 0) result.data()( _ , Slice(ma+mb+mc, -1) ) = d.data()(___);
809 template <
typename T>
810 DistributedMatrix<T>
concatenate_columns(
const DistributedMatrix<T>& a,
const DistributedMatrix<T>& b) {
811 MADNESS_ASSERT(a.rowdim()==b.rowdim() && a.rowtile()==b.rowtile() && a.is_row_distributed() && b.is_row_distributed());
813 int64_t ma = a.coldim();
814 int64_t mt = ma + b.coldim();
816 DistributedMatrix<T>
c(a.get_world(), mt, a.rowdim(), b.rowtile(), mt);
818 if(a.local_size() > 0) c.data()( Slice(0,ma-1), _ ) = a.data()(___);
819 if(a.local_size() > 0) c.data()( Slice(ma,-1), _ ) = b.data()(___);
int64_t local_rowdim() const
Returns the no. of row elements stored on this processor.
Definition: distributed_matrix.h:242
WorldGopInterface & gop
Global operations.
Definition: worldfwd.h:462
int64_t local_size() const
Returns the total no. of elements stored on this processor.
Definition: apps/ii/systolic.h:98
void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor< T > &s)
Copy from replicated patch (inclusive index range) into the distributed matrix.
Definition: distributed_matrix.h:543
friend DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed) ...
Definition: distributed_matrix.h:662
bool is_column_distributed() const
Returns true if the matrix is column distributed (i.e., row dimension not distributed) ...
Definition: distributed_matrix.h:352
virtual ~DistributedMatrix()
Definition: distributed_matrix.h:449
World & get_world() const
Returns the associated world.
Definition: distributed_matrix.h:346
DistributedMatrix(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs a distributed matrix dimension (n,m) with specified tile sizes and initialized to zero...
Definition: distributed_matrix.h:410
int64_t rowdim() const
Returns the row dimension of the matrix ... i.e., m for A(n,m)
Definition: apps/ii/systolic.h:83
int64_t rowtile() const
Returns the row tile size.
Definition: distributed_matrix.h:210
void copy_to_replicated(Tensor< T > &s) const
Copy from the distributed (m,n) matrix into the replicated matrix (collective call) ...
Definition: distributed_matrix.h:530
World & get_world() const
Returns associated world.
Definition: apps/ii/systolic.h:152
const Tensor< T > & data() const
Returns const reference to data.
Definition: distributed_matrix.h:513
int64_t Pcoldim
Definition: distributed_matrix.h:83
Definition: mpreal.h:3066
DistributedMatrix(const DistributedMatrix< T > &A, bool deepcopy=false)
Copy constructor copies dimensions, distribution, and shallow copy of content (unless deepcopy=true) ...
Definition: distributed_matrix.h:434
void fill(const funcT &f)
Fills the matrix with the provided function of the indices.
Definition: distributed_matrix.h:463
int64_t jdim
Definition: distributed_matrix.h:89
int64_t Prow
Definition: distributed_matrix.h:86
DistributedMatrix< T > column_distributed_matrix(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates an (n,m) matrix distributed by columns (row dimension is not distributed) ...
Definition: apps/ii/systolic.h:201
void local_rowrange(int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on this processor.
Definition: distributed_matrix.h:260
NDIM & f
Definition: mra.h:2179
DistributedMatrixDistribution(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs distribution and size info for a matrix (for use by factory functions only) ...
Definition: distributed_matrix.h:105
bool operator==(const DistributedMatrixDistribution &d) const
Definition: distributed_matrix.h:161
Tensor< T > & data()
Returns reference to the local data.
Definition: distributed_matrix.h:498
ProcessID size() const
Returns the number of processes in this world (same as MPI_Comm_size())
Definition: worldfwd.h:533
int64_t process_rowdim() const
Returns the no. of processors in the row dimension.
Definition: distributed_matrix.h:224
void get_colrange(int p, int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on processor p.
Definition: distributed_matrix.h:321
int64_t coldim() const
Returns the column dimension of the matrix ... i.e., n for A(n,m)
Definition: apps/ii/systolic.h:80
int64_t n
Definition: distributed_matrix.h:79
World * pworld
Definition: distributed_matrix.h:76
This header should include pretty much everything needed for the parallel runtime.
void get_range(int p, int64_t &ilow, int64_t &ihigh, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive ranges of column and row indicies on processor p.
Definition: distributed_matrix.h:297
DistributedMatrix< T > & operator=(const DistributedMatrix< T > &A)
Assigment copies dimensions, distribution, and shallow copy of content.
Definition: distributed_matrix.h:441
virtual ~DistributedMatrixDistribution()
Definition: distributed_matrix.h:373
int64_t tilen
Definition: distributed_matrix.h:81
Defines and implements most of Tensor.
int64_t local_jhigh() const
Returns the last row index on this processor (0 if no data present)
Definition: distributed_matrix.h:284
int64_t coltile() const
Returns the column tile size.
Definition: distributed_matrix.h:202
int64_t P
Definition: distributed_matrix.h:77
int64_t local_size() const
Returns the total no. of elements stored on this processor.
Definition: distributed_matrix.h:230
const T1 &f1 return GTEST_2_TUPLE_() T(f0, f1)
#define max(a, b)
Definition: lda.h:53
Definition: distributed_matrix.h:70
friend DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed) ...
Definition: distributed_matrix.h:693
const double pi
Definition: navstokes_cosines.cc:91
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence. ...
Definition: mra.h:1835
const mpreal min(const mpreal &x, const mpreal &y)
Definition: mpreal.h:2675
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor< T > &s) const
Copy from distributed matrix into replicated patch (inclusive index range; collective call) ...
Definition: distributed_matrix.h:562
int64_t ihi
Definition: distributed_matrix.h:87
DistributedMatrix< T > operator+(const DistributedMatrix< T > &A) const
Out of place addition — dimensions and distribution must be identical.
Definition: distributed_matrix.h:614
int64_t local_ilow() const
Returns the first column index on this processor (0 if no data present)
Definition: distributed_matrix.h:267
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks.
Definition: worldgop.h:767
void set(int64_t i, int64_t j, const T x)
Sets element (i,j) to v if (i,j) is local, otherwise throws MadnessException.
Definition: distributed_matrix.h:630
A parallel world with full functionality wrapping an MPI communicator.
Definition: worldfwd.h:416
int64_t coldim() const
Returns the column dimension of the matrix ... i.e., n for A(n,m)
Definition: distributed_matrix.h:186
void fill(T value)
Fills the matrix with a scalar.
Definition: distributed_matrix.h:475
bool is_row_distributed() const
Returns true if the matrix is row distributed (i.e., column dimension not distributed) ...
Definition: distributed_matrix.h:358
int ProcessID
Used to clearly identify process number/rank.
Definition: worldtypes.h:37
void copy_from_replicated(const Tensor< T > &s)
Copy from the replicated (m,n) matrix into the distributed matrix.
Definition: distributed_matrix.h:519
int64_t idim
Definition: distributed_matrix.h:89
int64_t local_coldim() const
Returns the no. of column elements stored on this processor.
Definition: distributed_matrix.h:236
void local_colrange(int64_t &ilow, int64_t &ihigh) const
Returns the inclusive range of column indices on this processor.
Definition: distributed_matrix.h:249
const double m
Definition: gfit.cc:199
void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix< T > &U) const
Definition: distributed_matrix.h:575
const DistributedMatrixDistribution & distribution() const
Returns the distribution (aka *this)
Definition: distributed_matrix.h:362
ProcessID rank
Definition: distributed_matrix.h:78
DistributedMatrix< T > concatenate_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b concatenated.
Definition: apps/ii/systolic.h:251
int64_t local_jlow() const
Returns the first row index on this processor (0 if no data present)
Definition: distributed_matrix.h:278
int64_t local_ihigh() const
Returns the last column index on this processor (-1 if no data present)
Definition: distributed_matrix.h:272
void clear()
Frees memory and resets state to same as default constructor.
Definition: distributed_matrix.h:453
DistributedMatrix< T > & operator*=(const T s)
Inplace scale by a constant.
Definition: distributed_matrix.h:624
bool has_same_dimension_and_distribution(const DistributedMatrix< R > &A)
Definition: distributed_matrix.h:595
int64_t rowdim() const
Returns the row dimension of the matrix ... i.e., m for A(n,m)
Definition: distributed_matrix.h:194
int64_t jhi
Definition: distributed_matrix.h:88
DistributedMatrix< T > concatenate_columns(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a row-distributed matrix with rows of a and b contatenated.
Definition: apps/ii/systolic.h:288
DistributedMatrix< T > row_distributed_matrix(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distributed by rows (column dimension is not distributed) ...
Definition: apps/ii/systolic.h:216
int64_t ilo
Definition: distributed_matrix.h:87
void get_rowrange(int p, int64_t &jlow, int64_t &jhigh) const
Returns the inclusive range of row indices on processor p.
Definition: distributed_matrix.h:335
DistributedMatrix< T > & operator+=(const DistributedMatrix< T > &A)
Inplace addition — dimensions and distribution must be identical.
Definition: distributed_matrix.h:603
bool is_column_distributed() const
Returns true if the matrix is column distributed (i.e., row dimension not distributed) ...
Definition: apps/ii/systolic.h:161
int64_t coltile() const
Returns the column tile size.
Definition: apps/ii/systolic.h:86
A slice defines a sub-range or patch of a dimension.
Definition: slice.h:103
int64_t tilem
Definition: distributed_matrix.h:82
DistributedMatrix(const DistributedMatrixDistribution &d)
Constructs a distributed matrix with given distribution info.
Definition: distributed_matrix.h:426
DistributedMatrixDistribution()
Default constructor makes an invalid distribution.
Definition: distributed_matrix.h:134
int64_t Prowdim
Definition: distributed_matrix.h:84
int64_t m
Definition: distributed_matrix.h:80
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces.
Definition: chem/atomutil.cc:45
ProcessID owner(int64_t i, int64_t j) const
Returns the number of the process that owns element (i,j)
Definition: distributed_matrix.h:366
void clear()
Resets state to same as default constructor.
Definition: distributed_matrix.h:156
const double c
Definition: gfit.cc:200
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79
int64_t jlo
Definition: distributed_matrix.h:88
int64_t process_coldim() const
Returns the no. of processors in the column dimension.
Definition: distributed_matrix.h:218
DistributedMatrix< T > interleave_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b interleaved.
Definition: apps/ii/systolic.h:233
void fill_identity()
Definition: distributed_matrix.h:480
DistributedMatrix()
Default constructor makes an empty matrix that cannot be used except as a target for assignemnt...
Definition: distributed_matrix.h:419
int64_t Pcol
Definition: distributed_matrix.h:85
int64_t rowtile() const
Returns the row tile size.
Definition: apps/ii/systolic.h:89
Manages data associated with a row/column/block distributed array.
Definition: apps/ii/systolic.h:51