Skip to content

Commit

Permalink
[opencl] optimizing thread number on Apple silicon, gain 6x speedup
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Jun 9, 2024
1 parent e5d41cc commit efc7496
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
14 changes: 12 additions & 2 deletions src/mmc_cl_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
#include "mmc_cl_utils.h"
#include "mmc_cl_host.h"

const char* VendorList[] = {"Unknown", "NVIDIA", "AMD", "Intel", "IntelGPU"};
const char* VendorList[] = {"Unknown", "NVIDIA", "AMD", "Intel", "IntelGPU", "AppleCPU", "AppleGPU"};

char* print_cl_errstring(cl_int err) {
switch (err) {
Expand Down Expand Up @@ -325,7 +325,17 @@ cl_platform_id mcx_list_cl_gpu(mcconfig* cfg, unsigned int* activedev, cl_device
cuinfo.vendor = dvIntel;
}

cuinfo.autothread = cuinfo.autoblock * cuinfo.core;
if (strstr(cuinfo.name, "Apple M")) {
cuinfo.vendor = dvAppleGPU;
cuinfo.autoblock = 64;
cuinfo.autothread = cuinfo.core * (16 * 48); // each Apple GPU core has 16 EU
} else if (strstr(cuinfo.name, "Apple")) {
cuinfo.vendor = dvAppleCPU;
cuinfo.autoblock = 1;
cuinfo.autothread = 2048;
} else {
cuinfo.autothread = cuinfo.autoblock * cuinfo.core;
}

if (cfg->isgpuinfo) {
MMC_FPRINTF(cfg->flog, "============ %s device ID %d [%d of %d]: %s ============\n", devname[j], cuid, k + 1, devnum, cuinfo.name);
Expand Down
2 changes: 1 addition & 1 deletion src/mmc_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ enum TSrcType {stPencil, stIsotropic, stCone, stGaussian, stPlanar,
enum TOutputType {otFlux, otFluence, otEnergy, otJacobian, otWL, otWP};
enum TOutputFormat {ofASCII, ofBin, ofNifti, ofAnalyze, ofMC2, ofTX3, ofJNifti, ofBJNifti};
enum TOutputDomain {odMesh, odGrid};
enum TDeviceVendor {dvUnknown, dvNVIDIA, dvAMD, dvIntel, dvIntelGPU};
enum TDeviceVendor {dvUnknown, dvNVIDIA, dvAMD, dvIntel, dvIntelGPU, dvAppleCPU, dvAppleGPU};
enum TMCXParent {mpStandalone, mpMATLAB};
enum TBoundary {bcNoReflect, bcReflect, bcAbsorbExterior, bcMirror /*, bcCylic*/};
enum TRayHitType {htNone, htInOut, htOutIn, htNoHitIn, htNoHitOut};
Expand Down

0 comments on commit efc7496

Please sign in to comment.