MADNESS
version 0.9
|
Provides collectives that interoperate with the AM and task interfaces. More...
#include <worldgop.h>
Public Member Functions | |
WorldGopInterface (World &world) | |
~WorldGopInterface () | |
bool | set_debug (bool value) |
Set debug flag to new value and return old value. More... | |
void | barrier () |
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks. More... | |
void | fence () |
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks. More... | |
void | broadcast (void *buf, size_t nbyte, ProcessID root, bool dowork=true) |
Broadcasts bytes from process root while still processing AM & tasks. More... | |
template<typename T > | |
void | broadcast (T *buf, size_t nelem, ProcessID root) |
Broadcasts typed contiguous data from process root while still processing AM & tasks. More... | |
template<typename T > | |
void | broadcast (T &t) |
Broadcast of a scalar from node 0 to all other nodes. More... | |
template<typename T > | |
void | broadcast (T &t, ProcessID root) |
Broadcast of a scalar from node root to all other nodes. More... | |
template<typename objT > | |
void | broadcast_serializable (objT &obj, ProcessID root) |
Broadcast a serializable object. More... | |
template<typename T , class opT > | |
void | reduce (T *buf, size_t nelem, opT op) |
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks. More... | |
template<typename T > | |
void | sum (T *buf, size_t nelem) |
Inplace global sum while still processing AM & tasks. More... | |
template<typename T > | |
void | min (T *buf, size_t nelem) |
Inplace global min while still processing AM & tasks. More... | |
template<typename T > | |
void | max (T *buf, size_t nelem) |
Inplace global max while still processing AM & tasks. More... | |
template<typename T > | |
void | absmin (T *buf, size_t nelem) |
Inplace global absmin while still processing AM & tasks. More... | |
template<typename T > | |
void | absmax (T *buf, size_t nelem) |
Inplace global absmax while still processing AM & tasks. More... | |
template<typename T > | |
void | product (T *buf, size_t nelem) |
Inplace global product while still processing AM & tasks. More... | |
template<typename T > | |
void | bit_and (T *buf, size_t nelem) |
template<typename T > | |
void | bit_or (T *buf, size_t nelem) |
template<typename T > | |
void | bit_xor (T *buf, size_t nelem) |
template<typename T > | |
void | logic_and (T *buf, size_t nelem) |
template<typename T > | |
void | logic_or (T *buf, size_t nelem) |
template<typename T > | |
void | sum (T &a) |
Global sum of a scalar while still processing AM & tasks. More... | |
template<typename T > | |
void | max (T &a) |
Global max of a scalar while still processing AM & tasks. More... | |
template<typename T > | |
void | min (T &a) |
Global min of a scalar while still processing AM & tasks. More... | |
template<typename T > | |
std::vector< T > | concat0 (const std::vector< T > &v, size_t bufsz=1024 *1024) |
Concatenate an STL vector of serializable stuff onto node 0. More... | |
template<typename keyT , typename valueT > | |
void | send (const ProcessID dest, const keyT &key, const valueT &value) const |
Send value to dest . More... | |
template<typename keyT , typename opT > | |
void | lazy_sync (const keyT &key, const opT &op) const |
Lazy sync. More... | |
template<typename keyT , typename opT > | |
void | lazy_sync (const keyT &key, const opT &op, const Group &group) const |
Group lazy sync. More... | |
template<typename keyT , typename valueT > | |
void | bcast (const keyT &key, Future< valueT > &value, const ProcessID root) const |
Broadcast. More... | |
template<typename keyT , typename valueT > | |
void | bcast (const keyT &key, Future< valueT > &value, const ProcessID group_root, const Group &group) const |
Group broadcast. More... | |
template<typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | reduce (const keyT &key, const valueT &value, const opT &op, const ProcessID root) |
Distributed reduce. More... | |
template<typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | reduce (const keyT &key, const valueT &value, const opT &op, const ProcessID group_root, const Group &group) |
Distributed group reduce. More... | |
template<typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | all_reduce (const keyT &key, const valueT &value, const opT &op) |
Distributed all reduce. More... | |
template<typename keyT , typename valueT , typename opT > | |
Future< typename detail::result_of< opT >::type > | all_reduce (const keyT &key, const valueT &value, const opT &op, const Group &group) |
Distributed, group all reduce. More... | |
Static Public Member Functions | |
template<typename valueT , typename keyT > | |
static Future< valueT > | recv (const ProcessID source, const keyT &key) |
Receive data from source . More... | |
Friends | |
class | detail::DeferredCleanup |
Provides collectives that interoperate with the AM and task interfaces.
If native AM interoperates with MPI we probably should map these to MPI.
|
inline |
|
inline |
|
inline |
Inplace global absmax while still processing AM & tasks.
|
inline |
Inplace global absmin while still processing AM & tasks.
|
inline |
Distributed all reduce.
The reduce functor must have the following signature:
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
root | The process that will receive the result of the reduction |
key
does not conflict with other calls to all_reduce
. Keys may be reuse after the associated operation has finished. References op(), and mpfr::root().
|
inline |
Distributed, group all reduce.
The reduce functor must have the following signature:
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
group_root | The group process that will receive the result of the reduction |
group | The group that will preform the reduction |
madness::Exception | When group is empty |
madness::Exception | When group is not registered |
madness::Exception | When the world id of group is not equal to that of the world used to construct this object |
madness::Exception | When this process is not in the group |
key
does not conflict with other calls to reduce
. Keys may be reuse after the associated operation has finished. References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), madness::Group::make_tree(), op(), madness::Group::rank(), and madness::Group::size().
|
inline |
Synchronizes all processes in communicator ... does NOT fence pending AM or tasks.
References madness::error(), and sum().
Referenced by madness::plotvtk_data(), and madness::World::World().
|
inline |
Broadcast.
Broadcast data from the root
process to all processes. The input/ output data is held by value
.
[in] | key | The key associated with this broadcast |
[in,out] | value | On the root process, this is used as the input data that will be broadcast to all other processes. On other processes it is used as the output to the broadcast. |
root | The process that owns the data to be broadcast |
madness::Exception | When root is less than 0 or greater than or equal to the world size. |
madness::Exception | When value has been set, except on the root process. |
key
does not conflict with other calls to bcast
. Keys may be reuse after the associated operation has finished. References madness::Future< T >::probe(), and mpfr::root().
|
inline |
Group broadcast.
Broadcast data from the group_root
process to all processes in group
. The input/output data is held by value
.
[in] | key | The key associated with this broadcast |
[in,out] | value | On the group_root process, this is used as the input data that will be broadcast to all other processes in the group. On other processes it is used as the output to the broadcast |
group_root | The process in group that owns the data to be broadcast | |
group | The process group where value will be broadcast |
madness::Exception | When group is empty |
madness::Exception | When group is not registered |
madness::Exception | When the world id of group is not equal to that of the world used to construct this object |
madness::Exception | When this process is not in the group |
madness::Exception | When group_root is less than 0 or greater than or equal to group size |
madness::Exception | When data has been set except on the root process |
key
does not conflict with other calls to bcast
. Keys may be reuse after the associated operation has finished. References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), madness::Future< T >::probe(), madness::Group::rank(), and madness::Group::size().
|
inline |
|
inline |
|
inline |
void madness::WorldGopInterface::broadcast | ( | void * | buf, |
size_t | nbyte, | ||
ProcessID | root, | ||
bool | dowork = true |
||
) |
Broadcasts bytes from process root while still processing AM & tasks.
Optimizations can be added for long messages
References madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), madness::World::mpi, MPI_BYTE, and SafeMPI::Intracomm::unique_tag().
Referenced by broadcast(), broadcast_serializable(), SCF::diag_fock_matrix(), doit(), madness::archive::BaseParallelArchive< BinaryFstreamInputArchive >::exists(), fence(), madness::SCF::get_fock_transformation(), madness::load_coeffs(), madness::load_quadrature(), SCF::localize_boys(), main(), madness::archive::BaseParallelArchive< BinaryFstreamInputArchive >::open(), propagate(), reduce(), madness::SCF::update_subspace(), and madness::World::World().
|
inline |
Broadcasts typed contiguous data from process root while still processing AM & tasks.
Optimizations can be added for long messages
References broadcast(), and std::tr1::T().
|
inline |
Broadcast of a scalar from node 0 to all other nodes.
References broadcast().
|
inline |
Broadcast of a scalar from node root to all other nodes.
References broadcast().
|
inline |
Broadcast a serializable object.
Current dumb version assumes object fits in 1MB ... you are free to add intelligence.
References broadcast(), mpfr::root(), and madness::archive::BufferOutputArchive::size().
Referenced by madness::archive::BaseParallelArchive< BinaryFstreamInputArchive >::broadcast(), doit(), madness::Solver< T, NDIM >::init(), madness::LoadBalanceDeux< NDIM >::load_balance(), main(), madness::MP2::MP2(), madness::SCF::SCF(), madness::Subspace< T, NDIM >::update_subspace(), and madness::SCF::update_subspace().
|
inline |
Concatenate an STL vector of serializable stuff onto node 0.
References madness::World::await(), MPI_BYTE, and madness::archive::BufferOutputArchive::size().
Referenced by madness::LoadBalanceDeux< NDIM >::load_balance().
void madness::WorldGopInterface::fence | ( | ) |
Synchronizes all processes in communicator AND globally ensures no pending AM or tasks.
Runs Dykstra-like termination algorithm on binary tree by locally ensuring ntask=0 and all am sent and processed, and then participating in a global sum of nsent and nrecv. Then globally checks that nsent=nrecv and that both are constant over two traversals. We are then we are sure that all tasks and AM are processed and there no AM in flight.
References madness::World::am, madness::World::await(), SafeMPI::Intracomm::binary_tree_info(), broadcast(), madness::detail::DeferredCleanup::do_cleanup(), madness::WorldTaskQueue::fence(), madness::WorldAmInterface::free_managed_buffers(), madness::WorldMpiInterface::Irecv(), madness::WorldMpiInterface::Isend(), madness::World::mpi, MPI_BYTE, PROFILE_MEMBER_FUNC, madness::WorldTaskQueue::size(), sum(), madness::World::taskq, and SafeMPI::Intracomm::unique_tag().
Referenced by madness::add(), madness::apply(), madness::FunctionImpl< Q, NDIM >::apply(), madness::FunctionImpl< Q, NDIM >::apply_1d_realspace_push(), SCF::apply_potential(), madness::GTHPseudopotential< double >::apply_potential(), madness::SCF::apply_potential(), madness::FunctionImpl< Q, NDIM >::apply_source_driven(), madness::FunctionImpl< Q, NDIM >::binaryXX(), madness::Solver< T, NDIM >::build_fock_matrix(), madness::Function< double, 6 >::clear(), madness::CompositeFunctorInterface< T, NDIM, MDIM >::CompositeFunctorInterface(), madness::compress(), madness::SCF::compute_residual(), madness::Solver< T, NDIM >::compute_rho(), madness::Solver< T, NDIM >::compute_rho_slow(), madness::conj(), madness::TDA_DFT::convolution_with_kernel(), madness::copy(), madness::FunctionImpl< Q, NDIM >::copy_coeffs(), madness::SCF::do_step_restriction(), doit(), energy(), madness::BinSorter< T, inserterT >::finish(), madness::FunctionImpl< Q, NDIM >::flo_unary_op_node_inplace(), madness::FunctionImpl< Q, NDIM >::FunctionImpl(), madness::gaxpy(), madness::FunctionImpl< Q, NDIM >::gaxpy(), gaxpy1(), madness::FunctionImpl< Q, NDIM >::gaxpy_ext(), madness::FunctionImpl< Q, NDIM >::gaxpy_inplace(), madness::FunctionImpl< Q, NDIM >::gaxpy_inplace_reconstructed(), madness::FunctionImpl< Q, NDIM >::hartree_product(), initial_loadbal(), madness::inner(), line_plot(), madness::FunctionImpl< Q, NDIM >::load(), madness::archive::ArchiveLoadImpl< ParallelInputArchive, WorldContainer< keyT, valueT > >::load(), madness::LoadBalanceDeux< NDIM >::load_balance(), madness::SCF::load_mos(), madness::Solver< T, NDIM >::load_orbitals(), madness::load_quadrature(), loadbal(), madness::SCF::loadbal(), madness::LoadBalanceDeux< NDIM >::LoadBalanceDeux(), main(), madness::SCF::make_density(), madness::FunctionImpl< Q, NDIM >::make_Vphi(), madness::matrix_inner(), madness::FunctionImpl< Q, NDIM >::merge_trees(), madness::mul(), madness::mul_sparse(), madness::EigSolverOp< T, NDIM >::multi_op_o(), madness::EigSolverOp< T, NDIM >::multi_op_r(), madness::Function< double, 6 >::multiop_values(), madness::FunctionImpl< Q, NDIM >::multiop_values(), madness::FunctionImpl< Q, NDIM >::mulXX(), madness::FunctionImpl< Q, NDIM >::mulXXvec(), madness::nonstandard(), madness::norm2(), madness::norm2s(), madness::norm_tree(), madness::normalize(), madness::Projector< double, 3 >::operator()(), madness::vecfunc< T, NDIM >::operator*(), madness::xfunction::operator*(), madness::plot_along(), madness::plot_line(), madness::plotdx(), madness::plotvtk_begin(), madness::plotvtk_data(), madness::plotvtk_end(), preloadbal(), madness::WorldProfile::print(), madness::Solver< T, NDIM >::print_fock_matrix_eigs(), madness::Solver< T, NDIM >::print_potential_matrix_eigs(), madness::print_stats(), madness::SCF::project(), madness::SCF::project_ao_basis(), madness::Solver< T, NDIM >::project_ao_basis(), madness::FunctionImpl< Q, NDIM >::project_out(), propagate(), madness::reconstruct(), madness::FunctionImpl< Q, NDIM >::recursive_apply(), madness::WorldDCPmapInterface< Key< D > >::redistribute(), madness::FunctionImpl< Q, NDIM >::refine(), madness::Solver< T, NDIM >::reproject(), madness::scale(), madness::FunctionImpl< Q, NDIM >::scale_oop(), scaled_plotvtk_begin(), madness::set_thresh(), madness::SCF::solve(), madness::standard(), madness::START_TIMER(), START_TIMER(), madness::Solver< T, NDIM >::START_TIMER(), madness::startup(), madness::FunctionImpl< Q, NDIM >::store(), madness::archive::ArchiveStoreImpl< ParallelOutputArchive, WorldContainer< keyT, valueT > >::store(), madness::sub(), madness::TDA_DFT::TDA_DFT(), test(), madness::transform(), madness::truncate(), SCF::twoint(), madness::SCF::twoint(), madness::FunctionImpl< Q, NDIM >::unary_op_coeff_inplace(), madness::FunctionImpl< Q, NDIM >::unary_op_node_inplace(), madness::FunctionImpl< Q, NDIM >::unary_op_value_inplace(), madness::FunctionImpl< Q, NDIM >::unaryXX(), madness::FunctionImpl< Q, NDIM >::unaryXXvalues(), madness::Subspace< T, NDIM >::update_subspace(), madness::SCF::update_subspace(), and madness::FunctionImpl< Q, NDIM >::vtransform().
|
inline |
Lazy sync.
Lazy sync functions are asynchronous barriers with a nullary functor that is called after all processes have called it with the same key. You can think of lazy_sync as an asynchronous barrier. The lazy_sync functor must have the following signature:
keyT | The key type |
opT | The operation type |
key | The sync key |
op | The sync operation to be executed on this process |
key
does not conflict with other calls to lazy_sync
. Keys may be reuse after the associated operation has finished. References madness::TaskAttributes::hipri(), and op().
|
inline |
Group lazy sync.
Lazy sync functions are asynchronous barriers with a nullary functor that is called after all processes in the group have called it with the same key. You can think of lazy_sync as an asynchronous barrier. The op
functor must have the following signature:
keyT | The key type |
opT | The operation type |
key | The sync key |
op | The sync operation to be executed on this process |
key
does not conflict with other calls to lazy_sync
. Keys may be reuse after the associated operation has finished. References madness::Group::empty(), madness::Group::get_world(), madness::TaskAttributes::hipri(), madness::World::id(), madness::Group::make_tree(), op(), and madness::Group::size().
|
inline |
|
inline |
|
inline |
Inplace global max while still processing AM & tasks.
Referenced by max(), and madness::print_stats().
|
inline |
Global max of a scalar while still processing AM & tasks.
References max().
|
inline |
Inplace global min while still processing AM & tasks.
Referenced by min(), and madness::print_stats().
|
inline |
Global min of a scalar while still processing AM & tasks.
References min().
|
inline |
Inplace global product while still processing AM & tasks.
|
inlinestatic |
Receive data from source
.
valueT | The data type stored in cache |
keyT | The key type |
source | The process that is sending the data to this process |
key | The key associated with the received data |
key
does not conflict with other calls to recv
. Keys may be reuse after the associated operation has finished.
|
inline |
Inplace global reduction (like MPI all_reduce) while still processing AM & tasks.
Optimizations can be added for long messages and to reduce the memory footprint
References madness::World::await(), broadcast(), MPI_BYTE, op(), and std::tr1::T().
|
inline |
Distributed reduce.
The reduce functor must have the following signature:
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
root | The process that will receive the result of the reduction |
key
does not conflict with other calls to reduce
. Keys may be reuse after the associated operation has finished. References op(), and mpfr::root().
|
inline |
Distributed group reduce.
The reduce functor must have the following signature:
keyT | The key type |
valueT | The data type to be reduced |
opT | The reduction operation type |
key | The key associated with this reduction |
value | The local value to be reduced |
op | The reduction operation to be applied to local and remote data |
group_root | The group process that will receive the result of the reduction |
group | The group that will preform the reduction |
madness::Exception | When group is empty |
madness::Exception | When group is not registered |
madness::Exception | When the world id of group is not equal to that of the world used to construct this object |
madness::Exception | When this process is not in the group |
madness::Exception | When group_root is less than zero or greater than or equal to group size. |
key
does not conflict with other calls to reduce
. Keys may be reuse after the associated operation has finished. References madness::Group::empty(), madness::Group::get_world(), madness::World::id(), madness::Group::make_tree(), op(), and madness::Group::size().
|
inline |
Send value to dest
.
keyT | The key type |
valueT | The value type (this may be a Future type) |
dest | The process where the data will be sent |
key | The key that is associated with the data |
value | The data to be sent to dest |
key
does not conflict with other calls to send
. Keys may be reuse after the associated operation has finished.
|
inline |
Set debug flag to new value and return old value.
|
inline |
Inplace global sum while still processing AM & tasks.
Referenced by barrier(), madness::DistributedMatrix< double >::copy_to_replicated(), madness::DistributedMatrix< double >::copy_to_replicated_patch(), CubicInterpolationTable< complexd >::CubicInterpolationTable(), fence(), madness::inner(), line_plot(), madness::matrix_inner(), madness::norm2(), madness::norm2s(), madness::print_stats(), projectL(), sum(), madness::Subspace< T, NDIM >::update_subspace(), and madness::SCF::update_subspace().
|
inline |
Global sum of a scalar while still processing AM & tasks.
References sum().
|
friend |