Skip to content

Commit

Permalink
Merge pull request #271 from SChernykh/evo
Browse files Browse the repository at this point in the history
RandomX fixes and improvements
  • Loading branch information
xmrig authored Aug 15, 2019
2 parents 85339fa + 235a357 commit b23d687
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 65 deletions.
5 changes: 3 additions & 2 deletions src/amd/GpuContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ struct GpuContext
Nonce(0)
#ifdef XMRIG_ALGO_RANDOMX
, gcnAsm(1)
, datasetHost(0)
, AsmProgram(nullptr)
, rx_variant(xmrig::VARIANT_AUTO)
, rx_dataset(nullptr)
, rx_scratchpads(nullptr)
, rx_hashes(nullptr)
, rx_entropy(nullptr)
Expand Down Expand Up @@ -131,11 +131,12 @@ struct GpuContext

#ifdef XMRIG_ALGO_RANDOMX
int gcnAsm;
int datasetHost;
cl_program AsmProgram;

uint8_t rx_dataset_seedhash[32];
xmrig::Variant rx_variant;
cl_mem rx_dataset;
static cl_mem rx_dataset[32];
cl_mem rx_scratchpads;
cl_mem rx_hashes;
cl_mem rx_entropy;
Expand Down
40 changes: 30 additions & 10 deletions src/amd/OclGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@

constexpr const char *kSetKernelArgErr = "Error %s when calling clSetKernelArg for kernel %d, argument %d.";

cl_mem GpuContext::rx_dataset[32] = {};


inline static const char *err_to_str(cl_int ret)
{
Expand Down Expand Up @@ -217,10 +219,20 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch
#ifdef XMRIG_ALGO_RANDOMX
if (config->algorithm().algo() == xmrig::RANDOM_X) {
const size_t dataset_size = randomx_dataset_item_count() * RANDOMX_DATASET_ITEM_SIZE;
ctx->rx_dataset = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_ONLY, dataset_size, nullptr, &ret);
if (ret != CL_SUCCESS) {
LOG_ERR("Error %s when calling clCreateBuffer to create RandomX dataset.", err_to_str(ret));
return OCL_ERR_API;

if (!ctx->rx_dataset[ctx->deviceIdx]) {
if (!ctx->datasetHost) {
ctx->rx_dataset[ctx->deviceIdx] = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_ONLY, dataset_size, nullptr, &ret);
}
else {
randomx_dataset* dataset = Workers::getDataset(nullptr, xmrig::VARIANT_AUTO);
ctx->rx_dataset[ctx->deviceIdx] = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, dataset_size, randomx_get_dataset_memory(dataset), &ret);
}

if (ret != CL_SUCCESS) {
LOG_ERR("Error %s when calling clCreateBuffer to create RandomX dataset.", err_to_str(ret));
return OCL_ERR_API;
}
}

ctx->rx_scratchpads = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_WRITE, (xmrig::rx_select_memory(config->algorithm().variant()) + 64) * g_thd, nullptr, &ret);
Expand Down Expand Up @@ -555,7 +567,7 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch
return OCL_ERR_API;
}

if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 3, sizeof(cl_mem), &ctx->rx_dataset)) != CL_SUCCESS) {
if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 3, sizeof(cl_mem), &ctx->rx_dataset[ctx->deviceIdx])) != CL_SUCCESS) {
LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 3);
return OCL_ERR_API;
}
Expand Down Expand Up @@ -617,7 +629,7 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch
// iteration is set in RXRunJob()

// randomx_run
if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 0, sizeof(cl_mem), &ctx->rx_dataset)) != CL_SUCCESS) {
if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 0, sizeof(cl_mem), &ctx->rx_dataset[ctx->deviceIdx])) != CL_SUCCESS) {
LOG_ERR(kSetKernelArgErr, err_to_str(ret), 10, 0);
return OCL_ERR_API;
}
Expand Down Expand Up @@ -1367,9 +1379,11 @@ size_t RXSetJob(GpuContext *ctx, uint8_t *input, size_t input_len, uint64_t targ
if ((memcmp(ctx->rx_dataset_seedhash, seed_hash, sizeof(ctx->rx_dataset_seedhash)) != 0) || (ctx->rx_variant != variant)) {
memcpy(ctx->rx_dataset_seedhash, seed_hash, sizeof(ctx->rx_dataset_seedhash));
ctx->rx_variant = variant;
if ((ret = OclLib::enqueueWriteBuffer(ctx->CommandQueues, ctx->rx_dataset, CL_TRUE, 0, dataset_size, randomx_get_dataset_memory(dataset), 0, nullptr, nullptr)) != CL_SUCCESS) {
LOG_ERR("Error %s when calling clEnqueueWriteBuffer to fill RandomX dataset.", err_to_str(ret));
return OCL_ERR_API;
if (!ctx->datasetHost) {
if ((ret = OclLib::enqueueWriteBuffer(ctx->CommandQueues, ctx->rx_dataset[ctx->deviceIdx], CL_TRUE, 0, dataset_size, randomx_get_dataset_memory(dataset), 0, nullptr, nullptr)) != CL_SUCCESS) {
LOG_ERR("Error %s when calling clEnqueueWriteBuffer to fill RandomX dataset.", err_to_str(ret));
return OCL_ERR_API;
}
}
}

Expand Down Expand Up @@ -1600,7 +1614,13 @@ void ReleaseOpenCl(GpuContext* ctx)

#ifdef XMRIG_ALGO_RANDOMX
if (ctx->AsmProgram) OclLib::releaseProgram(ctx->AsmProgram);
if (ctx->rx_dataset) OclLib::releaseMemObject(ctx->rx_dataset);

if (ctx->rx_dataset[ctx->deviceIdx]) {
cl_mem ptr = ctx->rx_dataset[ctx->deviceIdx];
ctx->rx_dataset[ctx->deviceIdx] = nullptr;
OclLib::releaseMemObject(ptr);
}

if (ctx->rx_scratchpads) OclLib::releaseMemObject(ctx->rx_scratchpads);
if (ctx->rx_hashes) OclLib::releaseMemObject(ctx->rx_hashes);
if (ctx->rx_entropy) OclLib::releaseMemObject(ctx->rx_entropy);
Expand Down
22 changes: 11 additions & 11 deletions src/amd/opencl/RandomX/randomx_run_gfx803.asm
Original file line number Diff line number Diff line change
Expand Up @@ -358,14 +358,15 @@ main_loop:

v_add_u32 v22, vcc, v6, v36
v_addc_u32 v25, vcc, v20, 0, vcc
v_add_u32 v21, vcc, v22, v1
v_addc_u32 v22, vcc, v25, 0, vcc
flat_load_dwordx2 v[21:22], v[21:22]
v_or_b32 v30, v32, v13
v_and_b32 v31, v33, v19
v_or_b32 v31, v31, v14
v_or_b32 v28, v28, v15
v_and_b32 v29, v29, v19
v_or_b32 v29, v29, v16
v_add_u32 v21, vcc, v22, v1
v_addc_u32 v22, vcc, v25, 0, vcc
ds_write2_b64 v5, v[30:31], v[28:29] offset1:1
s_waitcnt lgkmcnt(0)

Expand Down Expand Up @@ -402,6 +403,13 @@ main_loop:
# call JIT code
s_swappc_b64 s[12:13], s[4:5]

# Write out group F,E registers
# Write low 8 bytes from lane 0 and high 8 bytes from lane 1
ds_write2_b64 v41, v[60:61], v[62:63] offset0:8 offset1:10
ds_write2_b64 v41, v[64:65], v[66:67] offset0:12 offset1:14
ds_write2_b64 v41, v[68:69], v[70:71] offset0:16 offset1:18
ds_write2_b64 v41, v[72:73], v[74:75] offset0:20 offset1:22

# store VM integer registers
v_writelane_b32 v28, s16, 0
v_writelane_b32 v29, s17, 0
Expand All @@ -420,21 +428,13 @@ main_loop:
v_writelane_b32 v28, s30, 7
v_writelane_b32 v29, s31, 7

# Write out group F,E registers
# Write low 8 bytes from lane 0 and high 8 bytes from lane 1
ds_write2_b64 v41, v[60:61], v[62:63] offset0:8 offset1:10
ds_write2_b64 v41, v[64:65], v[66:67] offset0:12 offset1:14
ds_write2_b64 v41, v[68:69], v[70:71] offset0:16 offset1:18
ds_write2_b64 v41, v[72:73], v[74:75] offset0:20 offset1:22

# Restore execution mask
s_mov_b64 exec, s[36:37]

# Write out VM integer registers
ds_write_b64 v17, v[28:29]

flat_load_dwordx2 v[21:22], v[21:22]
s_waitcnt vmcnt(0) & lgkmcnt(0)
s_waitcnt lgkmcnt(0)
v_xor_b32 v21, v28, v21
v_xor_b32 v22, v29, v22
ds_read_b32 v28, v7
Expand Down
30 changes: 15 additions & 15 deletions src/amd/opencl/RandomX/randomx_run_gfx803.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,20 +134,20 @@ static unsigned char randomx_run_gfx803_bin[]={
,0xd8,0x00,0x00,0x00,0x18,0x00,0x00,0xec,0xd8,0x0c,0x00,0x00,0x1a,0x7f,0x00,0x8c,0xbf,0x1b,0x33,0x32,0x2a,0x1a,0x31,0x30,0x2a,0x19,0x15,0x14,0x2a,0x18,0x2f,0x2e
,0x2a,0x56,0x14,0x14,0x26,0x56,0x2e,0x2e,0x26,0x0a,0x03,0x14,0x32,0x17,0x03,0x2e,0x32,0x02,0x15,0x34,0x32,0x1b,0x6a,0x1c,0xd1,0x03,0x01,0xa9,0x01,0x02,0x2f,0x2e
,0x32,0x18,0x6a,0x1c,0xd1,0x03,0x01,0xa9,0x01,0x00,0x00,0x54,0xdc,0x1a,0x00,0x00,0x1c,0x00,0x00,0x54,0xdc,0x17,0x00,0x00,0x1e,0x71,0x0f,0x8c,0xbf,0x1c,0x09,0x40
,0x7e,0x1d,0x09,0x38,0x7e,0x70,0x0f,0x8c,0xbf,0x15,0x3d,0x44,0x2a,0x16,0x3f,0x46,0x2a,0x06,0x49,0x2c,0x32,0x19,0x6a,0x1c,0xd1,0x14,0x01,0xa9,0x01,0x20,0x1b,0x3c
,0x28,0x21,0x27,0x3e,0x26,0x1f,0x1d,0x3e,0x28,0x1c,0x1f,0x38,0x28,0x1d,0x27,0x3a,0x26,0x1d,0x21,0x3a,0x28,0x16,0x03,0x2a,0x32,0x16,0x6a,0x1c,0xd1,0x19,0x01,0xa9
,0x01,0x00,0x01,0x9c,0xd8,0x05,0x1e,0x1c,0x00,0x7f,0x00,0x8c,0xbf,0x83,0x01,0xfe,0xbe,0x08,0x0a,0xee,0xd8,0x29,0x00,0x00,0x3c,0x0c,0x0e,0xee,0xd8,0x29,0x00,0x00
,0x40,0x10,0x12,0xee,0xd8,0x29,0x00,0x00,0x44,0x14,0x16,0xee,0xd8,0x29,0x00,0x00,0x48,0x10,0x00,0x89,0xd2,0x22,0x01,0x01,0x00,0x11,0x00,0x89,0xd2,0x23,0x01,0x01
,0x00,0x12,0x00,0x89,0xd2,0x22,0x03,0x01,0x00,0x13,0x00,0x89,0xd2,0x23,0x03,0x01,0x00,0x14,0x00,0x89,0xd2,0x22,0x05,0x01,0x00,0x15,0x00,0x89,0xd2,0x23,0x05,0x01
,0x00,0x16,0x00,0x89,0xd2,0x22,0x07,0x01,0x00,0x17,0x00,0x89,0xd2,0x23,0x07,0x01,0x00,0x18,0x00,0x89,0xd2,0x22,0x09,0x01,0x00,0x19,0x00,0x89,0xd2,0x23,0x09,0x01
,0x00,0x1a,0x00,0x89,0xd2,0x22,0x0b,0x01,0x00,0x1b,0x00,0x89,0xd2,0x23,0x0b,0x01,0x00,0x1c,0x00,0x89,0xd2,0x22,0x0d,0x01,0x00,0x1d,0x00,0x89,0xd2,0x23,0x0d,0x01
,0x00,0x1e,0x00,0x89,0xd2,0x22,0x0f,0x01,0x00,0x1f,0x00,0x89,0xd2,0x23,0x0f,0x01,0x00,0x7f,0x00,0x8c,0xbf,0x04,0x1e,0x8c,0xbe,0x1c,0x00,0x8a,0xd2,0x10,0x00,0x01
,0x00,0x1d,0x00,0x8a,0xd2,0x11,0x00,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x12,0x02,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x13,0x02,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x14,0x04,0x01
,0x00,0x1d,0x00,0x8a,0xd2,0x15,0x04,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x16,0x06,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x17,0x06,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x18,0x08,0x01
,0x00,0x1d,0x00,0x8a,0xd2,0x19,0x08,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1a,0x0a,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1b,0x0a,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1c,0x0c,0x01
,0x00,0x1d,0x00,0x8a,0xd2,0x1d,0x0c,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1e,0x0e,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1f,0x0e,0x01,0x00,0x08,0x0a,0x9c,0xd8,0x29,0x3c,0x3e
,0x00,0x0c,0x0e,0x9c,0xd8,0x29,0x40,0x42,0x00,0x10,0x12,0x9c,0xd8,0x29,0x44,0x46,0x00,0x14,0x16,0x9c,0xd8,0x29,0x48,0x4a,0x00,0x24,0x01,0xfe,0xbe,0x00,0x00,0x9a
,0xd8,0x11,0x1c,0x00,0x00,0x00,0x00,0x54,0xdc,0x15,0x00,0x00,0x15,0x70,0x00,0x8c,0xbf,0x1c,0x2b,0x2a,0x2a,0x1d,0x2d,0x2c,0x2a,0x00,0x00,0x6c,0xd8,0x07,0x00,0x00
,0x7e,0x1d,0x09,0x38,0x7e,0x70,0x0f,0x8c,0xbf,0x15,0x3d,0x44,0x2a,0x16,0x3f,0x46,0x2a,0x06,0x49,0x2c,0x32,0x19,0x6a,0x1c,0xd1,0x14,0x01,0xa9,0x01,0x16,0x03,0x2a
,0x32,0x16,0x6a,0x1c,0xd1,0x19,0x01,0xa9,0x01,0x00,0x00,0x54,0xdc,0x15,0x00,0x00,0x15,0x20,0x1b,0x3c,0x28,0x21,0x27,0x3e,0x26,0x1f,0x1d,0x3e,0x28,0x1c,0x1f,0x38
,0x28,0x1d,0x27,0x3a,0x26,0x1d,0x21,0x3a,0x28,0x00,0x01,0x9c,0xd8,0x05,0x1e,0x1c,0x00,0x7f,0x00,0x8c,0xbf,0x83,0x01,0xfe,0xbe,0x08,0x0a,0xee,0xd8,0x29,0x00,0x00
,0x3c,0x0c,0x0e,0xee,0xd8,0x29,0x00,0x00,0x40,0x10,0x12,0xee,0xd8,0x29,0x00,0x00,0x44,0x14,0x16,0xee,0xd8,0x29,0x00,0x00,0x48,0x10,0x00,0x89,0xd2,0x22,0x01,0x01
,0x00,0x11,0x00,0x89,0xd2,0x23,0x01,0x01,0x00,0x12,0x00,0x89,0xd2,0x22,0x03,0x01,0x00,0x13,0x00,0x89,0xd2,0x23,0x03,0x01,0x00,0x14,0x00,0x89,0xd2,0x22,0x05,0x01
,0x00,0x15,0x00,0x89,0xd2,0x23,0x05,0x01,0x00,0x16,0x00,0x89,0xd2,0x22,0x07,0x01,0x00,0x17,0x00,0x89,0xd2,0x23,0x07,0x01,0x00,0x18,0x00,0x89,0xd2,0x22,0x09,0x01
,0x00,0x19,0x00,0x89,0xd2,0x23,0x09,0x01,0x00,0x1a,0x00,0x89,0xd2,0x22,0x0b,0x01,0x00,0x1b,0x00,0x89,0xd2,0x23,0x0b,0x01,0x00,0x1c,0x00,0x89,0xd2,0x22,0x0d,0x01
,0x00,0x1d,0x00,0x89,0xd2,0x23,0x0d,0x01,0x00,0x1e,0x00,0x89,0xd2,0x22,0x0f,0x01,0x00,0x1f,0x00,0x89,0xd2,0x23,0x0f,0x01,0x00,0x7f,0x00,0x8c,0xbf,0x04,0x1e,0x8c
,0xbe,0x08,0x0a,0x9c,0xd8,0x29,0x3c,0x3e,0x00,0x0c,0x0e,0x9c,0xd8,0x29,0x40,0x42,0x00,0x10,0x12,0x9c,0xd8,0x29,0x44,0x46,0x00,0x14,0x16,0x9c,0xd8,0x29,0x48,0x4a
,0x00,0x1c,0x00,0x8a,0xd2,0x10,0x00,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x11,0x00,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x12,0x02,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x13,0x02,0x01
,0x00,0x1c,0x00,0x8a,0xd2,0x14,0x04,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x15,0x04,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x16,0x06,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x17,0x06,0x01
,0x00,0x1c,0x00,0x8a,0xd2,0x18,0x08,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x19,0x08,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1a,0x0a,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1b,0x0a,0x01
,0x00,0x1c,0x00,0x8a,0xd2,0x1c,0x0c,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1d,0x0c,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1e,0x0e,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1f,0x0e,0x01
,0x00,0x24,0x01,0xfe,0xbe,0x00,0x00,0x9a,0xd8,0x11,0x1c,0x00,0x00,0x7f,0x00,0x8c,0xbf,0x1c,0x2b,0x2a,0x2a,0x1d,0x2d,0x2c,0x2a,0x00,0x00,0x6c,0xd8,0x07,0x00,0x00
,0x1c,0x00,0x00,0x6c,0xd8,0x08,0x00,0x00,0x1d,0x00,0x00,0x9a,0xd8,0x11,0x15,0x00,0x00,0x7f,0x01,0x8c,0xbf,0x08,0x10,0xee,0xd8,0x11,0x00,0x00,0x1e,0x1c,0x4b,0x14
,0x2a,0x7f,0x00,0x8c,0xbf,0x20,0x3d,0x3c,0x2a,0x21,0x3f,0x3e,0x2a,0x0a,0x3b,0x14,0x2a,0x00,0x00,0x74,0xdc,0x1a,0x15,0x00,0x00,0xff,0x14,0x14,0x26,0xc0,0xff,0xff
,0x7f,0x00,0x00,0x74,0xdc,0x17,0x1e,0x00,0x00,0x02,0x80,0x06,0xbf,0x06,0x00,0x85,0xbf,0x02,0x81,0x82,0x81,0x24,0x03,0x4a,0x7e,0x80,0x02,0x2e,0x7e,0x0a,0x03,0x48
Expand Down Expand Up @@ -215,4 +215,4 @@ static unsigned char randomx_run_gfx803_bin[]={
,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x62
,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x0f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
static const int randomx_run_gfx803_bin_size=6568;
const int randomx_run_gfx803_bin_size=6568;
22 changes: 11 additions & 11 deletions src/amd/opencl/RandomX/randomx_run_gfx900.asm
Original file line number Diff line number Diff line change
Expand Up @@ -345,12 +345,13 @@ main_loop:

v_add_co_u32 v22, vcc, v6, v36
v_addc_co_u32 v25, vcc, v20, 0, vcc
v_add_co_u32 v21, vcc, v22, v1
v_addc_co_u32 v22, vcc, v25, 0, vcc
global_load_dwordx2 v[21:22], v[21:22], off
v_or_b32 v30, v32, v13
v_and_or_b32 v31, v33, v19, v14
v_or_b32 v28, v28, v15
v_and_or_b32 v29, v29, v19, v16
v_add_co_u32 v21, vcc, v22, v1
v_addc_co_u32 v22, vcc, v25, 0, vcc
ds_write2_b64 v5, v[30:31], v[28:29] offset1:1
s_waitcnt lgkmcnt(0)

Expand Down Expand Up @@ -387,6 +388,13 @@ main_loop:
# call JIT code
s_swappc_b64 s[12:13], s[4:5]

# Write out group F,E registers
# Write low 8 bytes from lane 0 and high 8 bytes from lane 1
ds_write2_b64 v41, v[60:61], v[62:63] offset0:8 offset1:10
ds_write2_b64 v41, v[64:65], v[66:67] offset0:12 offset1:14
ds_write2_b64 v41, v[68:69], v[70:71] offset0:16 offset1:18
ds_write2_b64 v41, v[72:73], v[74:75] offset0:20 offset1:22

# store VM integer registers
v_writelane_b32 v28, s16, 0
v_writelane_b32 v29, s17, 0
Expand All @@ -405,21 +413,13 @@ main_loop:
v_writelane_b32 v28, s30, 7
v_writelane_b32 v29, s31, 7

# Write out group F,E registers
# Write low 8 bytes from lane 0 and high 8 bytes from lane 1
ds_write2_b64 v41, v[60:61], v[62:63] offset0:8 offset1:10
ds_write2_b64 v41, v[64:65], v[66:67] offset0:12 offset1:14
ds_write2_b64 v41, v[68:69], v[70:71] offset0:16 offset1:18
ds_write2_b64 v41, v[72:73], v[74:75] offset0:20 offset1:22

# Restore execution mask
s_mov_b64 exec, s[36:37]

# Write out VM integer registers
ds_write_b64 v17, v[28:29]

global_load_dwordx2 v[21:22], v[21:22], off
s_waitcnt vmcnt(0) & lgkmcnt(0)
s_waitcnt lgkmcnt(0)
v_xor_b32 v21, v28, v21
v_xor_b32 v22, v29, v22
ds_read_b32 v28, v7
Expand Down
Loading

0 comments on commit b23d687

Please sign in to comment.