Skip to content

Commit

Permalink
return detected photon info in mmclabcl,print progress bar
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Jul 25, 2019
1 parent ced4ca6 commit eb109e0
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 69 deletions.
4 changes: 3 additions & 1 deletion mmclab/example/demo_wide_det.m
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
addpath('../../matlab/');
addpath('..');

clear cfg

[node,face,c0]=latticegrid([0 60],[0 60],[0 5 10]);
c0(:,4)=[2;3]; % maximum element size for bottom (label 1) and top (label 2) layers
[node,elem]=surf2mesh(node,face,[],[],1,[],c0);
Expand Down Expand Up @@ -36,7 +38,7 @@
cfg.debuglevel = 'TP';
cfg.issaveexit = 2;

[flux,detp,~,~]=mmclab(cfg);
[flux,detp]=mmclab(cfg);

figure;
imagesc(sum(detp.data,3)');
Expand Down
61 changes: 30 additions & 31 deletions src/mmc_cl_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
cl_uint devid=0;
cl_mem gnode,gelem,gtype,gfacenb,gsrcelem,gnormal,gproperty,gparam,gdetpos; /*read-only buffers*/
cl_mem *gweight,*gdetphoton,*gseed,*genergy,*greporter; /*read-write buffers*/
cl_mem *gprogress,*gdetected, *gsrcpattern; /*read-write buffers*/
cl_mem *gprogress=NULL,*gdetected, *gsrcpattern; /*read-write buffers*/

cl_uint meshlen=((cfg->method==rtBLBadouelGrid) ? cfg->crop0.z : mesh->ne)<<cfg->nbuffer; // use 4 copies to reduce racing

Expand All @@ -85,13 +85,14 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
cl_uint *Pseed;
float *Pdet;
char opt[MAX_PATH_LENGTH]={'\0'};
cl_uint detreclen=(2+((cfg->ismomentum)>0))*mesh->prop+(cfg->issaveexit>0)*6+2;
cl_uint detreclen=(2+((cfg->ismomentum)>0))*mesh->prop+(cfg->issaveexit>0)*6+1;
cl_uint hostdetreclen=detreclen+1;
GPUInfo *gpu=NULL;

MCXParam param={{{cfg->srcpos.x,cfg->srcpos.y,cfg->srcpos.z}}, {{cfg->srcdir.x,cfg->srcdir.y,cfg->srcdir.z}},
cfg->tstart, cfg->tend, (uint)cfg->isreflect,(uint)cfg->issavedet,(uint)cfg->issaveexit,
(uint)cfg->ismomentum, (uint)cfg->isatomic, (uint)cfg->isspecular, 1.f/cfg->tstep, cfg->minenergy,
cfg->maxdetphoton, (mesh->prop+1), cfg->detnum, (uint)cfg->voidtime, (uint)cfg->srctype,
cfg->maxdetphoton, mesh->prop, cfg->detnum, (uint)cfg->voidtime, (uint)cfg->srctype,
{{cfg->srcparam1.x,cfg->srcparam1.y,cfg->srcparam1.z,cfg->srcparam1.w}},
{{cfg->srcparam2.x,cfg->srcparam2.y,cfg->srcparam2.z,cfg->srcparam2.w}},
0,cfg->maxgate,(uint)cfg->debuglevel, detreclen, cfg->outputtype, mesh->elemlen,
Expand All @@ -101,7 +102,7 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
mesh->srcelemlen, {{cfg->bary0.x,cfg->bary0.y,cfg->bary0.z,cfg->bary0.w}},
cfg->e0, cfg->isextdet, meshlen, cfg->nbuffer, ((1 << cfg->nbuffer)-1)};

MCXReporter reporter={0};
MCXReporter reporter={0.f};
platform=mcx_list_gpu(cfg,&workdev,devices,&gpu);

if(workdev>MAX_DEVICE)
Expand Down Expand Up @@ -162,7 +163,7 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
if(gpu[i].autothread%gpu[i].autoblock)
gpu[i].autothread=(gpu[i].autothread/gpu[i].autoblock)*gpu[i].autoblock;
if(gpu[i].maxgate==0 && meshlen>0){
int needmem=meshlen+gpu[i].autothread*sizeof(float4)*4+sizeof(float)*cfg->maxdetphoton*detreclen+10*1024*1024; /*keep 10M for other things*/
int needmem=meshlen+gpu[i].autothread*sizeof(float4)*4+sizeof(float)*cfg->maxdetphoton*hostdetreclen+10*1024*1024; /*keep 10M for other things*/
gpu[i].maxgate=(gpu[i].globalmem-needmem)/meshlen;
gpu[i].maxgate=MIN(((cfg->tend-cfg->tstart)/cfg->tstep+0.5),gpu[i].maxgate);
}
Expand All @@ -181,7 +182,7 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
}

field=(cl_float *)calloc(sizeof(cl_float)*meshlen,cfg->maxgate);
Pdet=(float*)calloc(cfg->maxdetphoton*sizeof(float),detreclen);
Pdet=(float*)calloc(cfg->maxdetphoton*sizeof(float),hostdetreclen);

fieldlen=meshlen*cfg->maxgate;

Expand All @@ -199,17 +200,20 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
else
gsrcelem=NULL;
OCL_ASSERT(((gnormal=clCreateBuffer(mcxcontext,RO_MEM, sizeof(float4)*(mesh->ne)*4,tracer->n,&status),status)));
if(cfg->detpos)
if(cfg->detpos && cfg->detnum)
OCL_ASSERT(((gdetpos=clCreateBuffer(mcxcontext,RO_MEM, cfg->detnum*sizeof(float4),cfg->detpos,&status),status)));
else
gdetpos=NULL;

OCL_ASSERT(((gproperty=clCreateBuffer(mcxcontext,RO_MEM, (mesh->prop+1)*sizeof(medium),mesh->med,&status),status)));
OCL_ASSERT(((gproperty=clCreateBuffer(mcxcontext,RO_MEM, (mesh->prop+1+cfg->isextdet)*sizeof(medium),mesh->med,&status),status)));
OCL_ASSERT(((gparam=clCreateBuffer(mcxcontext,RO_MEM, sizeof(MCXParam),&param,&status),status)));
OCL_ASSERT(((gprogress[0]=clCreateBuffer(mcxcontext,RW_PTR, sizeof(cl_uint),&progress,&status),status)));
progress = (cl_uint *)clEnqueueMapBuffer(mcxqueue[0], gprogress[0], CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_uint), 0, NULL, NULL, NULL);
cl_mem (*clCreateBufferNV)(cl_context,cl_mem_flags, cl_mem_flags_NV, size_t, void*, cl_int*) = (cl_mem (*)(cl_context,cl_mem_flags, cl_mem_flags_NV, size_t, void*, cl_int*)) clGetExtensionFunctionAddressForPlatform(platform, "clCreateBufferNV");
if (clCreateBufferNV == NULL)
OCL_ASSERT(((gprogress[0]=clCreateBuffer(mcxcontext,RW_PTR, sizeof(cl_uint),NULL,&status),status)));
else
OCL_ASSERT(((gprogress[0]=clCreateBufferNV(mcxcontext,CL_MEM_READ_WRITE, NV_PIN, sizeof(cl_uint),NULL,&status),status)));
progress = (cl_uint *)clEnqueueMapBuffer(mcxqueue[0], gprogress[0], CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uint), 0, NULL, NULL, NULL);
*progress=0;
clEnqueueUnmapMemObject(mcxqueue[0], gprogress[0], progress, 0, NULL, NULL);

for(i=0;i<workdev;i++){
Pseed=(cl_uint*)malloc(sizeof(cl_uint)*gpu[i].autothread*RAND_SEED_WORD_LEN);
Expand All @@ -218,7 +222,7 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
Pseed[j]=rand();
OCL_ASSERT(((gseed[i]=clCreateBuffer(mcxcontext,RW_MEM, sizeof(cl_uint)*gpu[i].autothread*RAND_SEED_WORD_LEN,Pseed,&status),status)));
OCL_ASSERT(((gweight[i]=clCreateBuffer(mcxcontext,RW_MEM, sizeof(float)*fieldlen,field,&status),status)));
OCL_ASSERT(((gdetphoton[i]=clCreateBuffer(mcxcontext,RW_MEM, sizeof(float)*cfg->maxdetphoton*detreclen,Pdet,&status),status)));
OCL_ASSERT(((gdetphoton[i]=clCreateBuffer(mcxcontext,RW_MEM, sizeof(float)*cfg->maxdetphoton*hostdetreclen,Pdet,&status),status)));
OCL_ASSERT(((genergy[i]=clCreateBuffer(mcxcontext,RW_MEM, sizeof(float)*(gpu[i].autothread<<1),energy,&status),status)));
OCL_ASSERT(((gdetected[i]=clCreateBuffer(mcxcontext,RW_MEM, sizeof(cl_uint),&detected,&status),status)));
OCL_ASSERT(((greporter[i]=clCreateBuffer(mcxcontext,RW_MEM, sizeof(MCXReporter),&reporter,&status),status)));
Expand Down Expand Up @@ -307,7 +311,7 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 0, sizeof(cl_uint),(void*)&threadphoton)));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 1, sizeof(cl_uint),(void*)&oddphotons)));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 2, sizeof(cl_mem), (void*)&gparam)));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 3, cfg->issavedet? sizeof(cl_float)*cfg->nblocksize*detreclen : sizeof(int), NULL)));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 3, cfg->issavedet? sizeof(cl_float)*((int)gpu[i].autoblock)*detreclen : sizeof(int), NULL)));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 4, sizeof(cl_mem), (void*)&gnode)));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 5, sizeof(cl_mem), (void*)&gelem)));
OCL_ASSERT((clSetKernelArg(mcxkernel[i], 6, sizeof(cl_mem), (void*)(gweight+i))));
Expand All @@ -330,7 +334,7 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
if(cfg->exportfield==NULL)
cfg->exportfield=mesh->weight;
if(cfg->exportdetected==NULL)
cfg->exportdetected=(float*)malloc(((mesh->prop+1)+1)*cfg->maxdetphoton*sizeof(float));
cfg->exportdetected=(float*)malloc(hostdetreclen*cfg->maxdetphoton*sizeof(float));

cfg->energytot=0.f;
cfg->energyesc=0.f;
Expand All @@ -354,8 +358,8 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
param.tend=twindow1;

for(devid=0;devid<workdev;devid++){
//OCL_ASSERT((clEnqueueWriteBuffer(mcxqueue[devid],gparam,CL_TRUE,0,sizeof(MCXParam),&param, 0, NULL, NULL)));
//OCL_ASSERT((clSetKernelArg(mcxkernel[devid],2, sizeof(cl_mem), (void*)&gparam)));
OCL_ASSERT((clEnqueueWriteBuffer(mcxqueue[devid],gparam,CL_TRUE,0,sizeof(MCXParam),&param, 0, NULL, NULL)));
OCL_ASSERT((clSetKernelArg(mcxkernel[devid],2, sizeof(cl_mem), (void*)&gparam)));
// launch mcxkernel
#ifndef USE_OS_TIMER
OCL_ASSERT((clEnqueueNDRangeKernel(mcxqueue[devid],mcxkernel[devid],1,NULL,&gpu[devid].autothread,&gpu[devid].autoblock, 0, NULL, &kernelevent)));
Expand All @@ -364,31 +368,26 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
#endif
OCL_ASSERT((clFlush(mcxqueue[devid])));
}
/* if((param.debuglevel & MCX_DEBUG_PROGRESS)){
if((cfg->debuglevel & MCX_DEBUG_PROGRESS)){
int p0 = 0, ndone=-1;
int threadphoton=(int)(cfg->nphoton*cfg->workload[0]/(fullload*gpu[0].autothread*cfg->respin));

mcx_progressbar(-0.f,cfg);

progress = (cl_uint *)clEnqueueMapBuffer(mcxqueue[0], gprogress[0], CL_FALSE, CL_MAP_READ, 0, sizeof(cl_uint), 0, NULL, NULL, NULL);
do{
ndone = *progress;

MMC_FPRINTF(cfg->flog,"progress=%d\n",ndone); // debug progress bar, will remove
if (ndone > p0){
mcx_progressbar(ndone/(threadphoton*1.45f),cfg);
mcx_progressbar((float)ndone/gpu[0].autothread*cfg->nphoton,cfg);
p0 = ndone;
}
sleep_ms(100);
}while (p0 < (param.threadphoton*1.45f));
mcx_progressbar(1.0f,cfg);
}while (p0 < gpu[0].autothread);
mcx_progressbar(cfg->nphoton,cfg);
MMC_FPRINTF(cfg->flog,"\n");

clEnqueueUnmapMemObject(mcxqueue[0], gprogress[0], progress, 0, NULL, NULL);
}
*/
clEnqueueUnmapMemObject(mcxqueue[0], gprogress[0], progress, 0, NULL, NULL);

//clWaitForEvents(workdev,waittoread);
for(devid=0;devid<workdev;devid++)
OCL_ASSERT((clFinish(mcxqueue[devid])));
Expand All @@ -408,7 +407,7 @@ void mmc_run_cl(mcconfig *cfg,tetmesh *mesh, raytracer *tracer){
if(cfg->issavedet){
OCL_ASSERT((clEnqueueReadBuffer(mcxqueue[devid],gdetected[devid],CL_FALSE,0,sizeof(uint),
&detected, 0, NULL, NULL)));
OCL_ASSERT((clEnqueueReadBuffer(mcxqueue[devid],gdetphoton[devid],CL_TRUE,0,sizeof(float)*cfg->maxdetphoton*detreclen,
OCL_ASSERT((clEnqueueReadBuffer(mcxqueue[devid],gdetphoton[devid],CL_TRUE,0,sizeof(float)*cfg->maxdetphoton*hostdetreclen,
Pdet, 0, NULL, NULL)));
if(detected>cfg->maxdetphoton){
MMC_FPRINTF(cfg->flog,"WARNING: the detected photon (%d) \
Expand All @@ -420,8 +419,8 @@ is more than what your have specified (%d), please use the -H option to specify
cfg->his.detected+=detected;
detected=MIN(detected,cfg->maxdetphoton);
if(cfg->exportdetected){
cfg->exportdetected=(float*)realloc(cfg->exportdetected,(cfg->detectedcount+detected)*detreclen*sizeof(float));
memcpy(cfg->exportdetected+cfg->detectedcount*(detreclen),Pdet,detected*(detreclen)*sizeof(float));
cfg->exportdetected=(float*)realloc(cfg->exportdetected,(cfg->detectedcount+detected)*hostdetreclen*sizeof(float));
memcpy(cfg->exportdetected+cfg->detectedcount*(hostdetreclen),Pdet,detected*(hostdetreclen)*sizeof(float));
cfg->detectedcount+=detected;
}
}
Expand Down Expand Up @@ -510,7 +509,7 @@ is more than what your have specified (%d), please use the -H option to specify
}

// total energy here equals total simulated photons+unfinished photons for all threads
MMC_FPRINTF(cfg->flog,"simulated %ld photons (%ld) with %d devices (ray-tet %d)\nMCX simulation speed: %.2f photon/ms\n",
MMC_FPRINTF(cfg->flog,"simulated %ld photons (%ld) with %d devices (ray-tet %.0f)\nMCX simulation speed: %.2f photon/ms\n",
cfg->nphoton,cfg->nphoton,workdev, reporter.raytet,(double)cfg->nphoton/toc);
MMC_FPRINTF(cfg->flog,"total simulated energy: %.2f\tabsorbed: %5.5f%%\n(loss due to initial specular reflection is excluded in the total)\n",
cfg->energytot,(cfg->energytot-cfg->energyesc)/cfg->energytot*100.f);
Expand Down
11 changes: 8 additions & 3 deletions src/mmc_cl_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,16 @@
extern "C" {
#endif

#define MIN(a,b) ((a)<(b)?(a):(b))
#ifndef CL_MEM_LOCATION_HOST_NV
#define CL_MEM_LOCATION_HOST_NV (1 << 0)
typedef cl_bitfield cl_mem_flags_NV;
#endif

#define RO_MEM (CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR)
#define WO_MEM (CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR)
#define RW_MEM (CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR)
#define RW_PTR (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR)
#define RW_PTR (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR)
#define NV_PIN CL_MEM_LOCATION_HOST_NV

#define OCL_ASSERT(x) ocl_assess((x),__FILE__,__LINE__)

Expand Down Expand Up @@ -84,7 +89,7 @@ typedef struct GPU_mcconfig{
} MCXParam __attribute__ ((aligned (32)));

typedef struct GPU_reporter{
cl_uint raytet;
float raytet;
} MCXReporter __attribute__ ((aligned (32)));

void mmc_run_cl(mcconfig *cfg, tetmesh *mesh, raytracer *tracer);
Expand Down
6 changes: 3 additions & 3 deletions src/mmc_cl_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ extern "C" {

#define ABS(a) ((a)<0?-(a):(a))

#define MCX_DEBUG_RNG 1 /**< MCX debug flags */
#define MCX_DEBUG_MOVE 2
#define MCX_DEBUG_PROGRESS 4
#define MCX_DEBUG_RNG 2 /**< MCX debug flags */
#define MCX_DEBUG_MOVE 1
#define MCX_DEBUG_PROGRESS 2048

#define MIN(a,b) ((a)<(b)?(a):(b))

Expand Down
Loading

0 comments on commit eb109e0

Please sign in to comment.