Skip to content

Commit

Permalink
Merge pull request #2 from simonepassera/main
Browse files Browse the repository at this point in the history
Introduced optimizer threshold for GatherMPI.
Fixed memory leaks on All-to-all generic implementation
  • Loading branch information
nicolotonci authored Nov 2, 2023
2 parents f8a97fb + 7798daa commit 94b232b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
4 changes: 4 additions & 0 deletions include/collectives/collectiveImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -832,8 +832,12 @@ class AlltoallGeneric : public CollectiveImpl {
errno = ECONNRESET;
return -1;
}

delete [] chunkbuff;
}
}

delete [] allsendbuff;

return selfrecvcount;
} else {
Expand Down
22 changes: 19 additions & 3 deletions include/collectives/mpiImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define MPICOLLIMPL_HPP

#include "collectiveImpl.hpp"
#include "../config.hpp"
#include <mpi.h>
#include <cassert>

Expand Down Expand Up @@ -289,14 +290,29 @@ class GatherMPI : public MPICollective {
}

size_t datacount = recvsize / datasize;

int recvcount = (datacount / nparticipants) * datasize;
int rcount = datacount % nparticipants;

if ((rcount == 0) && (recvcount >= GATHER_THRESHOLD_MSG_SIZE)) {
if ((size_t)recvcount > sendsize) {
MTCL_ERROR("[internal]:\t","sending buffer too small %ld instead of %ld\n", sendsize, recvcount);
errno = EINVAL;
return -1;
}

if (MPI_Gather(sendbuff, recvcount, MPI_BYTE, recvbuff, recvcount, MPI_BYTE, 0, comm) != MPI_SUCCESS) {
errno = ECOMM;
return -1;
}

return recvcount;
}

int *recvcounts = new int[nparticipants]();
int *displs = new int[nparticipants]();

int displ = 0;

int recvcount = (datacount / nparticipants) * datasize;
int rcount = datacount % nparticipants;

for (int i = 0; i < nparticipants; i++) {
recvcounts[i] = recvcount;
Expand Down
4 changes: 2 additions & 2 deletions include/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ const unsigned UCX_POLL_TIMEOUT = 10;

// -------- COLLECTIVES ------
const int CCONNECTION_RETRY = 10;
const unsigned CCONNECTION_TIMEOUT = 100; // milliseconds

const unsigned CCONNECTION_TIMEOUT = 100; // milliseconds
const int GATHER_THRESHOLD_MSG_SIZE = (1<<18); // bytes

#endif

0 comments on commit 94b232b

Please sign in to comment.