Skip to content

Commit

Permalink
Added NCCL error checking to tests.
Browse files Browse the repository at this point in the history
Also cleaned up makefile so that tests and lib are not built unnecessarily.

Change-Id: Ia0c596cc2213628de2f066be97615c09bb1bb262
Reviewed-on: http://git-master/r/999627
Reviewed-by: Przemek Tredak <[email protected]>
Tested-by: Przemek Tredak <[email protected]>
  • Loading branch information
nluehr authored and ptrendx committed Jan 29, 2016
1 parent fe1a956 commit 2758353
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 79 deletions.
14 changes: 7 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ TESTBINS := $(patsubst %, $(TSTDIR)/%, $(TESTS))
MPITESTBINS:= $(patsubst %, $(MPITSTDIR)/%, $(MPITESTS))
DEPFILES := $(patsubst %.o, %.d, $(LIBOBJ)) $(patsubst %, %.d, $(TESTBINS)) $(patsubst %, %.d, $(MPITESTBINS))

lib : $(INCTARGETS) $(LIBTARGET)
lib : $(INCTARGETS) $(LIBDIR)/$(LIBTARGET)

-include $(DEPFILES)

$(LIBTARGET) : $(LIBOBJ)
$(LIBDIR)/$(LIBTARGET) : $(LIBOBJ)
@printf "Linking %-25s\n" $@
@mkdir -p $(LIBDIR)
@$(GPP) $(CPPFLAGS) $(CXXFLAGS) -shared -Wl,-soname,$(LIBSONAME) -o $(LIBDIR)/$@ $(LDFLAGS) $(LIBOBJ)
@$(GPP) $(CPPFLAGS) $(CXXFLAGS) -shared -Wl,-soname,$(LIBSONAME) -o $@ $(LDFLAGS) $(LIBOBJ)
@ln -sf $(LIBSONAME) $(LIBDIR)/$(LIBNAME)
@ln -sf $(LIBTARGET) $(LIBDIR)/$(LIBSONAME)

Expand All @@ -109,7 +109,7 @@ clean :

test : lib $(TESTBINS)

$(TSTDIR)/% : test/single/%.cu lib
$(TSTDIR)/% : test/single/%.cu $(LIBDIR)/$(LIBTARGET)
@printf "Building %-25s > %-24s\n" $< $@
@mkdir -p $(TSTDIR)
@$(NVCC) $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcuda -lcurand -lnvToolsExt
Expand All @@ -121,11 +121,11 @@ $(TSTDIR)/% : test/single/%.cu lib

mpitest : lib $(MPITESTBINS)

$(MPITSTDIR)/% : test/mpi/%.cu lib
$(MPITSTDIR)/% : test/mpi/%.cu $(LIBDIR)/$(LIBTARGET)
@printf "Building %-25s > %-24s\n" $< $@
@mkdir -p $(MPITSTDIR)
@$(NVCC) $(MPIFLAGS) $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS)
@$(NVCC) $(MPIFLAGS) -M $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) > $(@:%=%.d.tmp)
@$(NVCC) $(MPIFLAGS) $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcurand
@$(NVCC) $(MPIFLAGS) -M $(TSTINC) $(CPPFLAGS) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -Lbuild/lib $(LIBLINK) $(LDFLAGS) -lcurand > $(@:%=%.d.tmp)
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d)
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d)
Expand Down
25 changes: 17 additions & 8 deletions test/include/test_utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,23 @@

#include <curand.h>

#define CUDACHECK(cmd) do { \
cudaError_t e = cmd; \
if( e != cudaSuccess ) { \
printf("Cuda failure %s:%d '%s'\n", \
__FILE__,__LINE__,cudaGetErrorString(e)); \
exit(EXIT_FAILURE); \
} \
} while(false)
#define CUDACHECK(cmd) do { \
cudaError_t e = cmd; \
if( e != cudaSuccess ) { \
printf("Cuda failure %s:%d '%s'\n", \
__FILE__,__LINE__,cudaGetErrorString(e)); \
exit(EXIT_FAILURE); \
} \
} while(0)

#define NCCLCHECK(cmd) do { \
ncclResult_t r = cmd; \
if (r!= ncclSuccess) { \
printf("NCCL failure %s:%d '%s'\n", \
__FILE__,__LINE__,ncclGetErrorString(r)); \
exit(EXIT_FAILURE); \
} \
} while(0)

template<typename T>
void Randomize(T* const dest, const int N, const int randomSeed);
Expand Down
18 changes: 5 additions & 13 deletions test/mpi/mpi_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,15 @@

#include "nccl.h"
#include "mpi.h"

#define CUDACHECK(cmd) do { \
cudaError_t e = cmd; \
if( e != cudaSuccess ) { \
printf("Cuda failure %s:%d '%s'\n", \
__FILE__,__LINE__,cudaGetErrorString(e)); \
exit(EXIT_FAILURE); \
} \
} while(false)
#include "test_utilities.h"

#define SIZE 128
#define NITERS 1

int main(int argc, char *argv[]) {
ncclUniqueId commId;
int size, rank;
int ret;
ncclResult_t ret;

MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
Expand All @@ -66,11 +58,11 @@ int main(int argc, char *argv[]) {

// NCCL Communicator creation
ncclComm_t comm;
ncclGetUniqueId(&commId);
NCCLCHECK(ncclGetUniqueId(&commId));
MPI_Bcast(&commId, NCCL_UNIQUE_ID_BYTES, MPI_CHAR, 0, MPI_COMM_WORLD);
ret = ncclCommInitRank(&comm, size, commId, rank);
if (ret != ncclSuccess) {
printf("NCCL Init failed : %d\n", ret);
printf("NCCL Init failed (%d) '%s'\n", ret, ncclGetErrorString(ret));
exit(1);
}

Expand All @@ -93,7 +85,7 @@ int main(int argc, char *argv[]) {
// Run allreduce
int errors = 0;
for (int i=0; i<NITERS; i++) {
ncclAllReduce((const void*)dptr, (void*)(dptr+SIZE), SIZE, ncclInt, ncclSum, comm, stream);
NCCLCHECK(ncclAllReduce((const void*)dptr, (void*)(dptr+SIZE), SIZE, ncclInt, ncclSum, comm, stream));
}

// Check results
Expand Down
20 changes: 10 additions & 10 deletions test/single/all_gather_test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*************************************************************************
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -41,7 +41,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
ncclComm_t* const comms, const std::vector<int>& dList) {
// initialize data
int nDev = 0;
ncclCommCount(comms[0], &nDev);
NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
T* buffer = (T*)malloc(nDev * N * sizeof(T));
T* result = (T*)malloc(nDev * N * sizeof(T));
Expand All @@ -61,8 +61,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclAllGather((const void*)sendbuff[i], std::min(32 * 1024, N), type,
(void*)recvbuff[i], comms[i], s[i]);
NCCLCHECK(ncclAllGather((const void*)sendbuff[i], std::min(32 * 1024, N), type,
(void*)recvbuff[i], comms[i], s[i]));
}

for (int i = 0; i < nDev; ++i) {
Expand All @@ -79,8 +79,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,

for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclAllGather((const void*)sendbuff[i], n, type, (void*)recvbuff[i], comms[i],
s[i]);
NCCLCHECK(ncclAllGather((const void*)sendbuff[i], n, type, (void*)recvbuff[i], comms[i],
s[i]));
}

for (int i = 0; i < nDev; ++i) {
Expand Down Expand Up @@ -121,7 +121,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* const comms,
const std::vector<int>& dList) {
int nDev = 0;
ncclCommCount(comms[0], &nDev);
NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** sendbuff = (T**)malloc(nDev * sizeof(T*));
T** recvbuff = (T**)malloc(nDev * sizeof(T*));

Expand Down Expand Up @@ -199,15 +199,15 @@ int main(int argc, char* argv[]) {
}

ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);
ncclCommInitAll(comms, nDev, dList.data());
NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));

printf("# Using devices\n");
for (int g=0; g<nDev; ++g) {
int cudaDev;
int rank;
cudaDeviceProp prop;
ncclCommCuDevice(comms[g], &cudaDev);
ncclCommUserRank(comms[g], &rank);
NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);
Expand Down
20 changes: 10 additions & 10 deletions test/single/all_reduce_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
memset(result, 0, N * sizeof(T));

int nDev = 0;
ncclCommCount(comms[0], &nDev);
NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);

for (int i = 0; i < nDev; ++i) {
Expand All @@ -66,7 +66,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], std::min(N, 1024 * 1024), type, op, comms[i], s[i]);
NCCLCHECK(ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], std::min(N, 1024 * 1024), type, op, comms[i], s[i]));
}

for (int i = 0; i < nDev; ++i) {
Expand All @@ -87,8 +87,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
//for (int i=0; i<100; i++) {
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], n, type, op,
comms[i], s[i]);
NCCLCHECK(ncclAllReduce((const void*)sendbuff[i], (void*)recvbuff[i], n, type, op,
comms[i], s[i]));
}
//}

Expand Down Expand Up @@ -130,8 +130,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
//for (int i=0; i<100; i++) {
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclAllReduce((const void*)sendbuff[i], (void*)sendbuff[i], n, type, op,
comms[i], s[i]);
NCCLCHECK(ncclAllReduce((const void*)sendbuff[i], (void*)sendbuff[i], n, type, op,
comms[i], s[i]));
}
//}

Expand Down Expand Up @@ -176,7 +176,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* comms,
const std::vector<int>& dList) {
int nDev = 0;
ncclCommCount(comms[0], &nDev);
NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** sendbuff = (T**)malloc(nDev * sizeof(T*));
T** recvbuff = (T**)malloc(nDev * sizeof(T*));

Expand Down Expand Up @@ -256,16 +256,16 @@ int main(int argc, char* argv[]) {
}

ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);
ncclCommInitAll(comms, nDev, dList.data());
NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));

if (!csv) {
printf("# Using devices\n");
for (int g = 0; g < nDev; ++g) {
int cudaDev;
int rank;
cudaDeviceProp prop;
ncclCommCuDevice(comms[g], &cudaDev);
ncclCommUserRank(comms[g], &rank);
NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);
Expand Down
16 changes: 8 additions & 8 deletions test/single/broadcast_test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*************************************************************************
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -41,7 +41,7 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root,
ncclComm_t* const comms, const std::vector<int>& dList) {
// initialize data
int nDev = 0;
ncclCommCount(comms[0], &nDev);
NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);
T* buffer = (T*)malloc(N * sizeof(T));
T* result = (T*)malloc(N * sizeof(T));
Expand All @@ -65,7 +65,7 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclBcast((void*)buff[i], std::min(32 * 1024, N), type, root, comms[i], s[i]);
NCCLCHECK(ncclBcast((void*)buff[i], std::min(32 * 1024, N), type, root, comms[i], s[i]));
}

for (int i = 0; i < nDev; ++i) {
Expand All @@ -83,7 +83,7 @@ void RunTest(T** buff, const int N, const ncclDataType_t type, const int root,

for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclBcast((void*)buff[i], n, type, root, comms[i], s[i]);
NCCLCHECK(ncclBcast((void*)buff[i], n, type, root, comms[i], s[i]));
}

for (int i = 0; i < nDev; ++i) {
Expand Down Expand Up @@ -123,7 +123,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* const comms,
const std::vector<int>& dList) {
int nDev = 0;
ncclCommCount(comms[0], &nDev);
NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** buff = (T**)malloc(nDev * sizeof(T*));

for (int i = 0; i < nDev; ++i) {
Expand Down Expand Up @@ -199,15 +199,15 @@ int main(int argc, char* argv[]) {
}

ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);;
ncclCommInitAll(comms, nDev, dList.data());
NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));

printf("# Using devices\n");
for (int g = 0; g < nDev; ++g) {
int cudaDev;
int rank;
cudaDeviceProp prop;
ncclCommCuDevice(comms[g], &cudaDev);
ncclCommUserRank(comms[g], &rank);
NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);
Expand Down
24 changes: 12 additions & 12 deletions test/single/reduce_scatter_test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*************************************************************************
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -41,7 +41,7 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
const ncclRedOp_t op, ncclComm_t* const comms, const std::vector<int>& dList) {
// initialize data
int nDev = 0;
ncclCommCount(comms[0], &nDev);
NCCLCHECK(ncclCommCount(comms[0], &nDev));
cudaStream_t* s = (cudaStream_t*)malloc(sizeof(cudaStream_t)*nDev);

T* buffer = (T*)malloc(N * nDev * sizeof(T));
Expand All @@ -66,8 +66,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,
// warm up GPU
for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclReduceScatter((const void*)sendbuff[i], (void*)recvbuff[i],
std::min(N, 1024 * 1024), type, op, comms[i], s[i]);
NCCLCHECK(ncclReduceScatter((const void*)sendbuff[i], (void*)recvbuff[i],
std::min(N, 1024 * 1024), type, op, comms[i], s[i]));
}

for (int i = 0; i < nDev; ++i) {
Expand All @@ -86,8 +86,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,

for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclReduceScatter((const void*)sendbuff[i], (void*)recvbuff[i], n, type,
op, comms[i], s[i]);
NCCLCHECK(ncclReduceScatter((const void*)sendbuff[i], (void*)recvbuff[i], n, type,
op, comms[i], s[i]));
}

for (int i = 0; i < nDev; ++i) {
Expand Down Expand Up @@ -122,8 +122,8 @@ void RunTest(T** sendbuff, T** recvbuff, const int N, const ncclDataType_t type,

for (int i = 0; i < nDev; ++i) {
CUDACHECK(cudaSetDevice(dList[i]));
ncclReduceScatter((const void*)sendbuff[i], (void*)sendbuff[i], n, type,
op, comms[i], s[i]);
NCCLCHECK(ncclReduceScatter((const void*)sendbuff[i], (void*)sendbuff[i], n, type,
op, comms[i], s[i]));
}

for (int i = 0; i < nDev; ++i) {
Expand Down Expand Up @@ -163,7 +163,7 @@ template<typename T>
void RunTests(const int N, const ncclDataType_t type, ncclComm_t* const comms,
const std::vector<int>& dList) {
int nDev = 0;
ncclCommCount(comms[0], &nDev);
NCCLCHECK(ncclCommCount(comms[0], &nDev));
T** sendbuff = (T**)malloc(nDev * sizeof(T*));
T** recvbuff = (T**)malloc(nDev * sizeof(T*));

Expand Down Expand Up @@ -243,15 +243,15 @@ int main(int argc, char* argv[]) {
}

ncclComm_t* comms = (ncclComm_t*)malloc(sizeof(ncclComm_t)*nDev);
ncclCommInitAll(comms, nDev, dList.data());
NCCLCHECK(ncclCommInitAll(comms, nDev, dList.data()));

printf("# Using devices\n");
for (int g = 0; g < nDev; ++g) {
int cudaDev;
int rank;
cudaDeviceProp prop;
ncclCommCuDevice(comms[g], &cudaDev);
ncclCommUserRank(comms[g], &rank);
NCCLCHECK(ncclCommCuDevice(comms[g], &cudaDev));
NCCLCHECK(ncclCommUserRank(comms[g], &rank));
CUDACHECK(cudaGetDeviceProperties(&prop, cudaDev));
printf("# Rank %2d uses device %2d [0x%02x] %s\n", rank, cudaDev,
prop.pciBusID, prop.name);
Expand Down
Loading

0 comments on commit 2758353

Please sign in to comment.