Assuming I am currently executing MPI_Bcast operations on 64 processes across 4 nodes, with 16 processes per node, I would like to know if the default communicator MPI_COMM_WORLD is MPIR_COMM_KIND_INTERCOMM or MPIR_COMM_KIND_INTERCOMM? Because I want to konw what kind of algorithm will be picked.
The relevant source code of MPICH 3.4.2 as below.
if (comm_ptr->comm_kind == MPIR_COMM_KIND__INTRACOMM) {
/* intracommunicator */
switch (MPIR_CVAR_BCAST_INTRA_ALGORITHM) {
case MPIR_CVAR_BCAST_INTRA_ALGORITHM_binomial:
mpi_errno =
MPIR_Bcast_intra_binomial(buffer, count, datatype, root, comm_ptr, errflag);
break;
case MPIR_CVAR_BCAST_INTRA_ALGORITHM_scatter_recursive_doubling_allgather:
mpi_errno =
MPIR_Bcast_intra_scatter_recursive_doubling_allgather(buffer, count, datatype,
root, comm_ptr, errflag);
break;
case MPIR_CVAR_BCAST_INTRA_ALGORITHM_scatter_ring_allgather:
mpi_errno =
MPIR_Bcast_intra_scatter_ring_allgather(buffer, count, datatype, root, comm_ptr,
errflag);
break;
case MPIR_CVAR_BCAST_INTRA_ALGORITHM_nb:
mpi_errno = MPIR_Bcast_allcomm_nb(buffer, count, datatype, root, comm_ptr, errflag);
break;
case MPIR_CVAR_BCAST_INTRA_ALGORITHM_smp:
mpi_errno = MPIR_Bcast_intra_smp(buffer, count, datatype, root, comm_ptr, errflag);
break;
case MPIR_CVAR_BCAST_INTRA_ALGORITHM_auto:
mpi_errno =
MPIR_Bcast_allcomm_auto(buffer, count, datatype, root, comm_ptr, errflag);
break;
default:
MPIR_Assert(0);
}
} else {
/* intercommunicator */
switch (MPIR_CVAR_BCAST_INTER_ALGORITHM) {
case MPIR_CVAR_BCAST_INTER_ALGORITHM_remote_send_local_bcast:
mpi_errno =
MPIR_Bcast_inter_remote_send_local_bcast(buffer, count, datatype, root,
comm_ptr, errflag);
break;
case MPIR_CVAR_BCAST_INTER_ALGORITHM_nb:
mpi_errno = MPIR_Bcast_allcomm_nb(buffer, count, datatype, root, comm_ptr, errflag);
break;
case MPIR_CVAR_BCAST_INTER_ALGORITHM_auto:
mpi_errno =
MPIR_Bcast_allcomm_auto(buffer, count, datatype, root, comm_ptr, errflag);
break;
default:
MPIR_Assert(0);
}
}
If the default MPI_COMM_WORLD is intra-comm-world, why I change the MPIR_CVAR_BCAST_INTER_ALGORITHM will affect the program performance? The problem has been dogging me for days. Hope someone can fix the question, thank you!