1 #ifndef MADNESS_DISTRIBUTED_MATRIX_H 
    2 #define MADNESS_DISTRIBUTED_MATRIX_H 
   58     class DistributedMatrixDistribution;
 
   59     template <
typename T> 
class DistributedMatrix;
 
   61     static inline DistributedMatrixDistribution column_distributed_matrix_distribution(World& world, int64_t n, int64_t 
m, int64_t coltile=0);
 
   62     static inline DistributedMatrixDistribution row_distributed_matrix_distribution(World& world, int64_t n, int64_t 
m, int64_t rowtile=0);
 
   65     DistributedMatrix<T> 
concatenate_rows(
const DistributedMatrix<T>& 
a, 
const DistributedMatrix<T>& 
b);
 
   68     DistributedMatrix<T> 
interleave_rows(
const DistributedMatrix<T>& 
a, 
const DistributedMatrix<T>& 
b);
 
  113             , Pcoldim((n-1)/tilen+1)
 
  114             , Prowdim((m-1)/tilem+1)
 
  116             , Prow(rank - Pcol*Prowdim)
 
  118             , ihi(
std::
min(ilo+tilen-1,n-1))
 
  120             , jhi(
std::
min(jlo+tilem-1,m-1))
 
  121             , idim(
std::
max(ihi-ilo+1,int64_t(0)))
 
  122             , jdim(
std::
max(jhi-jlo+1,int64_t(0)))
 
  124             if (ilo > ihi || jlo > jhi) {
 
  157             pworld = (
World*)(0);
 
  158             P = rank = n = m = tilen = tilem = Pcoldim = Prowdim = Pcol = Prow = ilo = ihi = jlo = jhi = idim = jdim = 0;
 
  297         void get_range(
int p, int64_t& ilow, int64_t& ihigh, int64_t& jlow, int64_t& jhigh)
 const {
 
  389     template <
typename T>
 
  390     class DistributedMatrix : 
public DistributedMatrixDistribution {
 
  391         friend DistributedMatrix<T> interleave_rows<T>(
const DistributedMatrix<T>& 
a, 
const DistributedMatrix<T>& 
b);
 
  392         friend DistributedMatrix<T> concatenate_rows<T>(
const DistributedMatrix<T>& 
a, 
const DistributedMatrix<T>& 
b);
 
  396         static T idij(
const int64_t i, 
const int64_t j) {
return (i==j) ?  
T(1) : 
T(0);}
 
  413             if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
 
  429             if (idim>0 && jdim>0) t = Tensor<T>(idim,jdim);
 
  436             , t(deepcopy ? 
copy(A.t) : A.t) 
 
  443                 DistributedMatrixDistribution::operator=(A);
 
  462         template <
typename funcT>
 
  464             for (int64_t i=ilo; i<=ihi; i++) {
 
  465                 for (int64_t j=jlo; j<=jhi; j++) {
 
  466                     t(i-ilo,j-jlo) = 
f(i,j);
 
  513         const Tensor<T>& 
data()
 const {
return t;}
 
  531             MADNESS_ASSERT(s.iscontiguous());
 
  548             if (i0<=i1 && j0<=j1) {
 
  549                 t(
Slice(i0-ilo,i1-ilo),
Slice(j0-jlo,j1-jlo)) = s(
Slice(i0-ilow,i1-ilow),
Slice(j0-jlow,j1-jlow));
 
  563             MADNESS_ASSERT(s.iscontiguous());
 
  569             if (i0<=i1 && j0<=j1) {
 
  570                 t(
Slice(i0-ilo,i1-ilo),
Slice(j0-jlo,j1-jlo)) = s(
Slice(i0-ilow,i1-ilow),
Slice(j0-jlow,j1-jlow));
 
  576             int newrowdim = jhigh - jlow + 1;
 
  577             MADNESS_ASSERT(jlow >= 0);
 
  578             MADNESS_ASSERT(jhigh < 
rowdim());
 
  579             MADNESS_ASSERT(newrowdim == U.rowdim());
 
  580             MADNESS_ASSERT(
coldim() == U.coldim());
 
  582             MADNESS_ASSERT(U.is_column_distributed());
 
  583             MADNESS_ASSERT(
coltile() == U.coltile());
 
  589             if (i0<=i1 && j0<=j1) {
 
  590                 U.data()(___) = t(
Slice(i0-ilo,i1-ilo),
Slice(j0-jlo,j1-jlo));
 
  594         template <
typename R> 
 
  616             return copy(*
this)+=A;
 
  630         void set(int64_t i, int64_t j, 
const T x) {
 
  631             MADNESS_ASSERT(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
 
  636         T get(int64_t i, int64_t j) 
const {
 
  637             MADNESS_ASSERT(i>=ilo && i<=ihi && j>=jlo && j<=jhi);
 
  638             return t(i-ilo,j-jlo);
 
  647     template <
typename T>
 
  661     static inline DistributedMatrixDistribution
 
  662     column_distributed_matrix_distribution(
World& world, int64_t n, int64_t 
m, int64_t coltile) { 
 
  663         if (world.
size()*coltile < n) coltile = (n-1)/world.
size() + 1;
 
  665         if ((coltile&0x1)) ++coltile; 
 
  679     template <
typename T>
 
  692     static inline DistributedMatrixDistribution
 
  693     row_distributed_matrix_distribution(
World& world, int64_t n, int64_t 
m, int64_t rowtile) { 
 
  694         if (world.
size()*rowtile < 
m) rowtile = (m-1)/world.
size() + 1;
 
  707     template <
typename T>
 
  724     template <
typename T>
 
  725     DistributedMatrix<T> 
interleave_rows(
const DistributedMatrix<T>& a, 
const DistributedMatrix<T>& 
b) {
 
  726         MADNESS_ASSERT(a.rowdim()==b.rowdim() && a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.rowtile()==b.rowtile());
 
  728         DistributedMatrix<T> 
c(a.get_world(), a.coldim()*2, a.rowdim(), a.coltile()*2, a.rowtile());
 
  729         c.data()(Slice(0,-1,2),_) = a.data()(___);
 
  730         c.data()(Slice(1,-1,2),_) = b.data()(___);
 
  745     template <
typename T>
 
  746     DistributedMatrix<T> 
concatenate_rows(
const DistributedMatrix<T>& a, 
const DistributedMatrix<T>& b) {
 
  747         MADNESS_ASSERT(a.coldim()==b.coldim() && a.coltile()==b.coltile() && a.is_column_distributed() && b.is_column_distributed());
 
  749         int64_t ma = a.rowdim();
 
  750         int64_t mb = b.rowdim();
 
  752         DistributedMatrix<T> 
c(a.get_world(), a.coldim(), ma+mb, a.coltile(), ma+mb);
 
  755         a.local_colrange(ilow, ihigh);
 
  757             c.data()(_,Slice(0,ma-1)) = a.data()(___);
 
  758             c.data()(_,Slice(ma,-1))  = b.data()(___);
 
  776     template <
typename T>
 
  777     DistributedMatrix<T> 
concatenate_rows( 
const DistributedMatrix<T>& a, 
const DistributedMatrix<T>& b, 
const DistributedMatrix<T>& 
c, 
const DistributedMatrix<T>& d) {
 
  778         MADNESS_ASSERT(a.coldim()==b.coldim() && b.coldim()==c.coldim() && c.coldim()==d.coldim());
 
  779         MADNESS_ASSERT(a.coltile()==b.coltile() && b.coltile()==c.coltile() && c.coltile()==d.coltile());
 
  780         MADNESS_ASSERT(a.is_column_distributed() && b.is_column_distributed() && c.is_column_distributed() && d.is_column_distributed());
 
  782         int64_t ma = a.rowdim();
 
  783         int64_t mb = b.rowdim();
 
  784         int64_t mc = c.rowdim();
 
  785         int64_t md = d.rowdim();
 
  787         DistributedMatrix<T> result(a.get_world(), a.coldim(), ma+mb+mc+md, a.coltile(), ma+mb+mc+md);
 
  789         if(a.local_size() > 0) result.data()( _ , Slice(0,ma-1) ) = a.data()(___);
 
  790         if(b.local_size() > 0) result.data()( _ , Slice(ma, ma+mb-1) ) = b.data()(___);
 
  791         if(c.local_size() > 0) result.data()( _ , Slice(ma+mb, ma+mb+mc-1) ) = c.data()(___);
 
  792         if(d.local_size() > 0) result.data()( _ , Slice(ma+mb+mc, -1) ) = d.data()(___);
 
  809     template <
typename T>
 
  810     DistributedMatrix<T> 
concatenate_columns(
const DistributedMatrix<T>& a, 
const DistributedMatrix<T>& b) {
 
  811         MADNESS_ASSERT(a.rowdim()==b.rowdim() && a.rowtile()==b.rowtile() && a.is_row_distributed() && b.is_row_distributed());
 
  813         int64_t ma = a.coldim();
 
  814         int64_t mt = ma + b.coldim();
 
  816         DistributedMatrix<T> 
c(a.get_world(), mt, a.rowdim(), b.rowtile(), mt);
 
  818         if(a.local_size() > 0) c.data()( Slice(0,ma-1), _ ) = a.data()(___);
 
  819         if(a.local_size() > 0) c.data()( Slice(ma,-1), _ ) = b.data()(___);
 
int64_t local_rowdim() const 
Returns the no. of row elements stored on this processor. 
Definition: distributed_matrix.h:242
WorldGopInterface & gop
Global operations. 
Definition: worldfwd.h:462
int64_t local_size() const 
Returns the total no. of elements stored on this processor. 
Definition: apps/ii/systolic.h:98
void copy_from_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, const Tensor< T > &s)
Copy from replicated patch (inclusive index range) into the distributed matrix. 
Definition: distributed_matrix.h:543
friend DistributedMatrixDistribution column_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t coltile)
Generates distribution for an (n,m) matrix distributed by columns (row dimension is not distributed) ...
Definition: distributed_matrix.h:662
bool is_column_distributed() const 
Returns true if the matrix is column distributed (i.e., row dimension not distributed) ...
Definition: distributed_matrix.h:352
virtual ~DistributedMatrix()
Definition: distributed_matrix.h:449
World & get_world() const 
Returns the associated world. 
Definition: distributed_matrix.h:346
DistributedMatrix(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs a distributed matrix dimension (n,m) with specified tile sizes and initialized to zero...
Definition: distributed_matrix.h:410
int64_t rowdim() const 
Returns the row dimension of the matrix ... i.e., m for A(n,m) 
Definition: apps/ii/systolic.h:83
int64_t rowtile() const 
Returns the row tile size. 
Definition: distributed_matrix.h:210
void copy_to_replicated(Tensor< T > &s) const 
Copy from the distributed (m,n) matrix into the replicated matrix (collective call) ...
Definition: distributed_matrix.h:530
World & get_world() const 
Returns associated world. 
Definition: apps/ii/systolic.h:152
const Tensor< T > & data() const 
Returns const reference to data. 
Definition: distributed_matrix.h:513
int64_t Pcoldim
Definition: distributed_matrix.h:83
Definition: mpreal.h:3066
DistributedMatrix(const DistributedMatrix< T > &A, bool deepcopy=false)
Copy constructor copies dimensions, distribution, and shallow copy of content (unless deepcopy=true) ...
Definition: distributed_matrix.h:434
void fill(const funcT &f)
Fills the matrix with the provided function of the indices. 
Definition: distributed_matrix.h:463
int64_t jdim
Definition: distributed_matrix.h:89
int64_t Prow
Definition: distributed_matrix.h:86
DistributedMatrix< T > column_distributed_matrix(World &world, int64_t n, int64_t m, int64_t coltile=0)
Generates an (n,m) matrix distributed by columns (row dimension is not distributed) ...
Definition: apps/ii/systolic.h:201
void local_rowrange(int64_t &jlow, int64_t &jhigh) const 
Returns the inclusive range of row indices on this processor. 
Definition: distributed_matrix.h:260
NDIM & f
Definition: mra.h:2179
DistributedMatrixDistribution(World &world, int64_t n, int64_t m, int64_t coltile, int64_t rowtile)
Constructs distribution and size info for a matrix (for use by factory functions only) ...
Definition: distributed_matrix.h:105
bool operator==(const DistributedMatrixDistribution &d) const 
Definition: distributed_matrix.h:161
Tensor< T > & data()
Returns reference to the local data. 
Definition: distributed_matrix.h:498
ProcessID size() const 
Returns the number of processes in this world (same as MPI_Comm_size()) 
Definition: worldfwd.h:533
int64_t process_rowdim() const 
Returns the no. of processors in the row dimension. 
Definition: distributed_matrix.h:224
void get_colrange(int p, int64_t &ilow, int64_t &ihigh) const 
Returns the inclusive range of column indices on processor p. 
Definition: distributed_matrix.h:321
int64_t coldim() const 
Returns the column dimension of the matrix ... i.e., n for A(n,m) 
Definition: apps/ii/systolic.h:80
int64_t n
Definition: distributed_matrix.h:79
World * pworld
Definition: distributed_matrix.h:76
This header should include pretty much everything needed for the parallel runtime. 
void get_range(int p, int64_t &ilow, int64_t &ihigh, int64_t &jlow, int64_t &jhigh) const 
Returns the inclusive ranges of column and row indicies on processor p. 
Definition: distributed_matrix.h:297
DistributedMatrix< T > & operator=(const DistributedMatrix< T > &A)
Assigment copies dimensions, distribution, and shallow copy of content. 
Definition: distributed_matrix.h:441
virtual ~DistributedMatrixDistribution()
Definition: distributed_matrix.h:373
int64_t tilen
Definition: distributed_matrix.h:81
Defines and implements most of Tensor. 
int64_t local_jhigh() const 
Returns the last row index on this processor (0 if no data present) 
Definition: distributed_matrix.h:284
int64_t coltile() const 
Returns the column tile size. 
Definition: distributed_matrix.h:202
int64_t P
Definition: distributed_matrix.h:77
int64_t local_size() const 
Returns the total no. of elements stored on this processor. 
Definition: distributed_matrix.h:230
const T1 &f1 return GTEST_2_TUPLE_() T(f0, f1)
#define max(a, b)
Definition: lda.h:53
Definition: distributed_matrix.h:70
friend DistributedMatrixDistribution row_distributed_matrix_distribution(World &world, int64_t n, int64_t m, int64_t rowtile)
Generates an (n,m) matrix distribution distributed by rows (column dimension is not distributed) ...
Definition: distributed_matrix.h:693
const double pi
Definition: navstokes_cosines.cc:91
Function< T, NDIM > copy(const Function< T, NDIM > &f, const std::shared_ptr< WorldDCPmapInterface< Key< NDIM > > > &pmap, bool fence=true)
Create a new copy of the function with different distribution and optional fence. ...
Definition: mra.h:1835
const mpreal min(const mpreal &x, const mpreal &y)
Definition: mpreal.h:2675
FLOAT a(int j, FLOAT z)
Definition: y1.cc:86
void copy_to_replicated_patch(int64_t ilow, int64_t ihigh, int64_t jlow, int64_t jhigh, Tensor< T > &s) const 
Copy from distributed matrix into replicated patch (inclusive index range; collective call) ...
Definition: distributed_matrix.h:562
int64_t ihi
Definition: distributed_matrix.h:87
DistributedMatrix< T > operator+(const DistributedMatrix< T > &A) const 
Out of place addition — dimensions and distribution must be identical. 
Definition: distributed_matrix.h:614
int64_t local_ilow() const 
Returns the first column index on this processor (0 if no data present) 
Definition: distributed_matrix.h:267
void sum(T *buf, size_t nelem)
Inplace global sum while still processing AM & tasks. 
Definition: worldgop.h:767
void set(int64_t i, int64_t j, const T x)
Sets element (i,j) to v if (i,j) is local, otherwise throws MadnessException. 
Definition: distributed_matrix.h:630
A parallel world with full functionality wrapping an MPI communicator. 
Definition: worldfwd.h:416
int64_t coldim() const 
Returns the column dimension of the matrix ... i.e., n for A(n,m) 
Definition: distributed_matrix.h:186
void fill(T value)
Fills the matrix with a scalar. 
Definition: distributed_matrix.h:475
bool is_row_distributed() const 
Returns true if the matrix is row distributed (i.e., column dimension not distributed) ...
Definition: distributed_matrix.h:358
int ProcessID
Used to clearly identify process number/rank. 
Definition: worldtypes.h:37
void copy_from_replicated(const Tensor< T > &s)
Copy from the replicated (m,n) matrix into the distributed matrix. 
Definition: distributed_matrix.h:519
int64_t idim
Definition: distributed_matrix.h:89
int64_t local_coldim() const 
Returns the no. of column elements stored on this processor. 
Definition: distributed_matrix.h:236
void local_colrange(int64_t &ilow, int64_t &ihigh) const 
Returns the inclusive range of column indices on this processor. 
Definition: distributed_matrix.h:249
const double m
Definition: gfit.cc:199
void extract_columns(int64_t jlow, int64_t jhigh, DistributedMatrix< T > &U) const 
Definition: distributed_matrix.h:575
const DistributedMatrixDistribution & distribution() const 
Returns the distribution (aka *this) 
Definition: distributed_matrix.h:362
ProcessID rank
Definition: distributed_matrix.h:78
DistributedMatrix< T > concatenate_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b concatenated. 
Definition: apps/ii/systolic.h:251
int64_t local_jlow() const 
Returns the first row index on this processor (0 if no data present) 
Definition: distributed_matrix.h:278
int64_t local_ihigh() const 
Returns the last column index on this processor (-1 if no data present) 
Definition: distributed_matrix.h:272
void clear()
Frees memory and resets state to same as default constructor. 
Definition: distributed_matrix.h:453
DistributedMatrix< T > & operator*=(const T s)
Inplace scale by a constant. 
Definition: distributed_matrix.h:624
bool has_same_dimension_and_distribution(const DistributedMatrix< R > &A)
Definition: distributed_matrix.h:595
int64_t rowdim() const 
Returns the row dimension of the matrix ... i.e., m for A(n,m) 
Definition: distributed_matrix.h:194
int64_t jhi
Definition: distributed_matrix.h:88
DistributedMatrix< T > concatenate_columns(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a row-distributed matrix with rows of a and b contatenated. 
Definition: apps/ii/systolic.h:288
DistributedMatrix< T > row_distributed_matrix(World &world, int64_t n, int64_t m, int64_t rowtile=0)
Generates an (n,m) matrix distributed by rows (column dimension is not distributed) ...
Definition: apps/ii/systolic.h:216
int64_t ilo
Definition: distributed_matrix.h:87
void get_rowrange(int p, int64_t &jlow, int64_t &jhigh) const 
Returns the inclusive range of row indices on processor p. 
Definition: distributed_matrix.h:335
DistributedMatrix< T > & operator+=(const DistributedMatrix< T > &A)
Inplace addition — dimensions and distribution must be identical. 
Definition: distributed_matrix.h:603
bool is_column_distributed() const 
Returns true if the matrix is column distributed (i.e., row dimension not distributed) ...
Definition: apps/ii/systolic.h:161
int64_t coltile() const 
Returns the column tile size. 
Definition: apps/ii/systolic.h:86
A slice defines a sub-range or patch of a dimension. 
Definition: slice.h:103
int64_t tilem
Definition: distributed_matrix.h:82
DistributedMatrix(const DistributedMatrixDistribution &d)
Constructs a distributed matrix with given distribution info. 
Definition: distributed_matrix.h:426
DistributedMatrixDistribution()
Default constructor makes an invalid distribution. 
Definition: distributed_matrix.h:134
int64_t Prowdim
Definition: distributed_matrix.h:84
int64_t m
Definition: distributed_matrix.h:80
Holds machinery to set up Functions/FuncImpls using various Factories and Interfaces. 
Definition: chem/atomutil.cc:45
ProcessID owner(int64_t i, int64_t j) const 
Returns the number of the process that owns element (i,j) 
Definition: distributed_matrix.h:366
void clear()
Resets state to same as default constructor. 
Definition: distributed_matrix.h:156
const double c
Definition: gfit.cc:200
FLOAT b(int j, FLOAT z)
Definition: y1.cc:79
int64_t jlo
Definition: distributed_matrix.h:88
int64_t process_coldim() const 
Returns the no. of processors in the column dimension. 
Definition: distributed_matrix.h:218
DistributedMatrix< T > interleave_rows(const DistributedMatrix< T > &a, const DistributedMatrix< T > &b)
Generates a distributed matrix with rows of a and b interleaved. 
Definition: apps/ii/systolic.h:233
void fill_identity()
Definition: distributed_matrix.h:480
DistributedMatrix()
Default constructor makes an empty matrix that cannot be used except as a target for assignemnt...
Definition: distributed_matrix.h:419
int64_t Pcol
Definition: distributed_matrix.h:85
int64_t rowtile() const 
Returns the row tile size. 
Definition: apps/ii/systolic.h:89
Manages data associated with a row/column/block distributed array. 
Definition: apps/ii/systolic.h:51