12 #include "core/LongOptions.h" 13 #include "core/Arguments.h" 14 #include "system/signal_handler.h" 15 #include "mpi/mpi_utils.h" 16 #include "mpi/mpi_common.h" 18 #include "controller/AbstractController.h" 21 #include "MPIWorkerHandler.h" 23 #include "MPIMaster.h" 32 R
"(* Help message for 'mpi' master * 35 The MPI master parallelizes instances of the simulator, also called 36 "workers", across all launched MPI processes. This means that every MPI 37 process is responsible for spawning workers. The correspondence between 38 workers and MPI processes is one-to-one; launching N MPI processes results in 39 N workers running in parallel. 41 If no optional arguments are given, the simulator is, by default, assumed to 42 be a standard simulator, which means that it communicates with pakman 43 through its stdin and stdout. 45 If the optional argument --mpi-simulator is given, the simulator is assumed 46 to communicate with pakman through MPI. The MPI simulator must then be 47 written with the header pakman_mpi_worker.h or PakmanMPIWorker.hpp. 49 In order to maximize the number of CPU cycles devoted to the workers, the MPI 50 master is implemented using an event loop. The time spent sleeping at each 51 iteration of the event loop can be adjusted using the optional argument 54 When a worker needs to be shut down, for example when the algorithm has 55 finished, pakman first sends SIGTERM to the worker. If the worker has not 56 exited after a fixed amount of time, it is killed by sending the SIGKILL 57 signal. The amount of time between sending SIGTERM and SIGKILL can be 58 changed using the optional argument --kill-timeout. This is only meaningful 59 for standard simulators because the MPI standard does not support signals for 60 processes that are spawned using MPI functions. 62 Some MPI implementations do not automatically spawn dynamic MPI processes on 63 the same host as the spawning MPI process. The flag --force-host-spawn tries 64 to enforce spawning dynamic MPI processes on the same host by setting the 65 "host" key in MPI_Info to the same host as the spawning MPI process. 68 -m, --mpi-simulator simulator is spawned using MPI 69 -f, --force-host-spawn force MPI simulator to spawn on same host 70 as manager (requires -m option) 71 -p, --mpi-info=KEY_VAL_STR specify key-value pairs for MPI_Info object 73 'KEY1=VALUE1; KEY2=VALUE2; ...; KEYN=VALUEN' 74 (requires -m option). The characters '=' and 75 ';' can be escaped using a backslash. 76 -t, --main-timeout=TIME sleep for TIME ms in event loop (default 1) 77 -k, --kill-timeout=TIME wait for TIME ms before sending SIGKILL 86 return Manager::mpi_worker;
89 return Manager::forked_worker;
95 lopts.
add({
"main-timeout", required_argument,
nullptr,
't'});
96 lopts.
add({
"kill-timeout", required_argument,
nullptr,
'k'});
97 lopts.
add({
"mpi-simulator", no_argument,
nullptr,
'm'});
98 lopts.
add({
"force-host-spawn", no_argument,
nullptr,
'f'});
99 lopts.
add({
"mpi-info", required_argument,
nullptr,
'p'});
106 bool mpi_simulator =
false;
123 mpi_simulator =
true;
130 std::cout <<
"Error: option --mpi-simulator must be set " 131 "if --force-host-spawn is set\n";
132 ::help(mpi, controller, EXIT_FAILURE);
136 std::cout <<
"Error: option --mpi-simulator must be set " 137 "if --mpi-info is set\n";
138 ::help(mpi, controller, EXIT_FAILURE);
142 MPI_Init(
nullptr,
nullptr);
147 MPI_Info_create(&g_info);
154 for (
auto it = dict.begin(); it != dict.end(); ++it)
155 MPI_Info_set(g_info, it->first.c_str(), it->second.c_str());
161 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
164 set_signal_handler();
168 get_worker(mpi_simulator);
171 std::shared_ptr<AbstractController>
175 auto p_manager = std::make_shared<Manager>(p_controller->getSimulator(),
184 p_master->assignController(p_controller);
185 p_controller->assignMaster(p_master);
188 while (p_master->isActive() || p_manager->isActive())
190 if (p_master->isActive())
193 if (p_manager->isActive())
194 p_manager->iterate();
202 while (p_manager->isActive())
204 p_manager->iterate();
212 p_controller.reset();
216 MPI_Info_free(&g_info);
229 int comm_size = get_mpi_comm_world_size();
230 int signal = TERMINATE_MANAGER_SIGNAL;
232 for (
int manager_rank = 1; manager_rank < comm_size; manager_rank++)
233 MPI_Send(&signal, 1, MPI_INT, manager_rank,
234 MASTER_SIGNAL_TAG, MPI_COMM_WORLD);
240 int is_finalized = 0;
241 MPI_Finalized(&is_finalized);
std::map< std::string, std::string > parse_key_value_pairs(const std::string &str)
bool g_program_terminated
std::chrono::milliseconds g_kill_timeout
std::chrono::milliseconds g_main_timeout
static void run(controller_t controller, const Arguments &args)
static void terminateStatic()
bool isOptionalArgumentSet(const std::string &option_name) const
static AbstractController * makeController(controller_t controller, const Arguments &args)
static std::string help()
void add(struct option long_opt)
static void addLongOptions(LongOptions &lopts)
std::string optionalArgument(const std::string &option_name) const