#include "mpi.h" #include /***************************************************************************** * Function: cube_dim * Return: int * Inputs: num_procs: number of processors * Descrp: decides based on num_procs the dimension of the cube that contains num_procs. * Auther: LAM, but modified by Ahmad Faraj ****************************************************************************/ int cube_dim(int num_procs) { int dim; unsigned size; if (num_procs < 1) return -1; for (dim = 0, size = 1; size < num_procs; ++dim, size <<= 1); return dim; } /***************************************************************************** * Function: hi_bit * Return: int * Inputs: rank: rank id dim: dimension rank is in * Descrp: based on the dimension of rank, it decides the highest bit set in rank. * Auther: LAM, but modified by Ahmad Faraj ****************************************************************************/ int hi_bit(int rank, int dim) { unsigned mask; if ((rank < 1) || (dim < 1)) return -1; dim--; mask = 1 << dim; for (; dim >= 0; --dim, mask >>= 1) if (rank & mask) break; return dim; } /***************************************************************************** * Function: bcast_lam_binomial_tree * Return: int * Inputs: buff: send input buffer count: number of elements to send data_type: data type of elements being sent root: source of data comm: communicator * Descrp: broadcasts using a bionomial tree. * Auther: LAM, but modified by Ahmad Faraj ****************************************************************************/ int bcast_lam_binomial_tree(void * buff, int count, MPI_Datatype data_type, int root, MPI_Comm comm ) { MPI_Request * req_ptr; MPI_Request * reqs; MPI_Aint extent, chunk; int i, rank, num_procs; int vrank, peer, dim, high_bit, mask, num_reqs; int tag = 1; int success = 0; int failure = 1; char * recvptr = (char *) buff; MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &num_procs); MPI_Type_extent(data_type, &extent); chunk = extent * count; vrank = (rank + num_procs - root) % num_procs; dim = cube_dim(num_procs); high_bit = hi_bit(vrank, dim); --dim; if (vrank > 0) { peer = ((vrank & ~(1 << high_bit)) + root) % num_procs; MPIC_Recv(buff, count, data_type, peer, tag, comm, MPI_STATUS_IGNORE); } // Send data to the children. reqs = (MPI_Request *) malloc(num_procs * sizeof(MPI_Request)); if (!reqs) { printf("allgather-gather-bcast:107: cannot allocate memory\n"); MPI_Finalize(); exit(failure); } req_ptr = reqs; num_reqs = 0; for (i = high_bit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) { peer = vrank | mask; if (peer < num_procs) { num_reqs++; peer = (peer + root) % num_procs; MPIC_Isend(buff, count, data_type, peer, tag, comm, req_ptr++); } } // wait on all requests. if (num_reqs) MPI_Waitall(num_reqs, reqs, MPI_STATUSES_IGNORE); free(reqs); return success; }