Hi all -
I wasn't entirely sure if I should post this here, or at the OpenMPI
list, but I believe my issue is coming from the way I'm using
boost.mpi. I appear to be leaking MPI_Comm handles. Since I intend
to spawn hundreds of times during the lifetime of my program, this is
less than ideal ;)
Here's the code
//common.h
#include
std::string get_processor_name() {
char buf[256];
int blen = 256;
MPI_Get_processor_name(buf, &blen);
return std::string(buf);
}
//master.cpp
#include "common.h"
void doWork(size_t numChildren, char *cmd) {
namespace mpi = boost::mpi;
MPI_Comm all;
MPI_Comm_spawn(cmd, MPI_ARGV_NULL, numChildren, MPI_INFO_NULL, 0,
MPI_COMM_SELF, &all, MPI_ERRCODES_IGNORE);
mpi::intercommunicator intercomm(all, mpi::comm_duplicate);
mpi::communicator comm = intercomm.merge(false);
std::cerr << "spawned by " << get_processor_name() << " (rank " <<
comm.rank() << ")" << std::endl;
std::string str;
for(int i = 1; i < comm.size(); ++i) {
comm.recv(i, 0, str);
std::cerr << "got string " << str << " from rank " << i << std::endl;
}
std::string mymsg = "hi back form the master";
broadcast(comm, mymsg, 0);
}
int main(int argc, char **args) {
std::cerr << "hi, it's me" << std::endl;
//mpi::environment env;
int provided;
MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &provided);
std::ifstream inFile("hostsfile");
std::string dest;
size_t totalProcesses = 0;
std::string hosts;
while(std::getline(inFile, dest)) {
totalProcesses += (!dest.empty() && dest[0] != '#');
}
char *cmd = strdup("/home/budgeb/tests/test_mpi/spawnslave");
for(size_t j = 0; j < 2; ++j) {
doWork(totalProcesses - 1, cmd);
}
free(cmd);
MPI_Finalize();
return 0;
}
//slave.cpp
#include "common.h"
#include
#include
void doWork() {
namespace mpi = boost::mpi;
MPI_Comm parent;
MPI_Comm_get_parent(&parent);
mpi::intercommunicator intercomm(parent, mpi::comm_duplicate);
mpi::communicator comm = intercomm.merge(true);
std::cerr << "slave " << get_processor_name() << " of rank " <<
comm.rank() << std::endl;
double d = comm.rank();
for(size_t i = 0; i < 100000000; ++i) {
d = sin(exp(d));
}
comm.send(0, 0, std::string("hello world (") +
boost::lexical_caststd::string(d) + ")");
std::string masterMsg;
broadcast(comm, masterMsg, 0);
std::cerr << "master sent this: " << masterMsg << std::endl;
}
int main(int argc, char **args) {
int provided;
MPI_Init_thread(NULL, NULL, MPI_THREAD_MULTIPLE, &provided);
doWork();
MPI_Finalize();
return 0;
}
When I run this program, my output is fine, but upon shutdown, I get this:
[budgeb-interlagos:04473] WARNING: MPI_Comm still allocated in MPI_Finalize
[budgeb-interlagos:04473] Dumping information for comm_cid 4
[budgeb-interlagos:04473] f2c index:4 cube_dim: 0
[budgeb-interlagos:04473] Local group: size = 1 my_rank = 0
[budgeb-interlagos:04473] Communicator is:
[budgeb-interlagos:04473] inter-comm,
[budgeb-interlagos:04473]
[budgeb-interlagos:04473] Remote group size:3
[budgeb-interlagos:04473] WARNING: MPI_Comm still allocated in MPI_Finalize
[budgeb-interlagos:04473] Dumping information for comm_cid 6
[budgeb-interlagos:04473] f2c index:6 cube_dim: 0
[budgeb-interlagos:04473] Local group: size = 1 my_rank = 0
[budgeb-interlagos:04473] Communicator is:
[budgeb-interlagos:04473] inter-comm,
[budgeb-interlagos:04473]
[budgeb-interlagos:04473] Remote group size:3
Does anyone know why this leak might occur?
Thanks,
Brian