Initial commit.
This commit is contained in:
403
gpusolver.c
Normal file
403
gpusolver.c
Normal file
@@ -0,0 +1,403 @@
|
||||
#include "gpusolver.h"
|
||||
#include <CL/cl.h>
|
||||
#include "time.h"
|
||||
|
||||
|
||||
static const char kernel_source[4560] = "static inline void stateaddpow(uint wcnt, uint* state, uint pow) {\n"
|
||||
" uint corpow = pow & 0b11111U;\n"
|
||||
" uint startind = pow >> 5U;\n"
|
||||
" uint tr = 1U << corpow;\n"
|
||||
" uint tval = state[startind] + tr;\n"
|
||||
" uchar choice = !((tval > state[startind]) && (tval >= tr));\n"
|
||||
" state[startind] = tval;\n"
|
||||
" for (uint i = 0; i < wcnt; ++i) {\n"
|
||||
" uchar cond = (i > startind);\n"
|
||||
" state[i] += choice * cond;\n"
|
||||
" choice = choice & (state[i] == 0) * cond + (!cond) & choice;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__global static uint setmax;\n"
|
||||
"\n"
|
||||
"__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __global uint* maxvals) {\n"
|
||||
" output[0] = 2;\n"
|
||||
"\n"
|
||||
" uint cnt = cnfheader[0];\n"
|
||||
" uint vcnt = cnfheader[1];\n"
|
||||
" uint ccnt = cnfheader[2];\n"
|
||||
"\n"
|
||||
" uint wcnt = 1 + (vcnt >> 5U);\n"
|
||||
"\n"
|
||||
" // Zero out the counter\n"
|
||||
" for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;\n"
|
||||
"\n"
|
||||
" uint maxctr = 1U << (vcnt & 0b11111U);\n"
|
||||
"\n"
|
||||
" uint glbid = get_global_id(0);\n"
|
||||
" uint glbsz = get_global_size(0);\n"
|
||||
"\n"
|
||||
" /*\n"
|
||||
" uint locid = get_local_id(0);\n"
|
||||
" uint locsz = get_local_size(0);\n"
|
||||
" uint grpid = get_group_id(0);\n"
|
||||
" uint grpcn = get_num_groups(0);\n"
|
||||
" */\n"
|
||||
"\n"
|
||||
" bool done = false;\n"
|
||||
" uint iter = 0;\n"
|
||||
" while (output[0] == 2) {\n"
|
||||
" // if (glbid == 0) printf(\"%s\\n\", \":~\");\n"
|
||||
" setmax = 0;\n"
|
||||
" uint maxnumx = 0;\n"
|
||||
"\n"
|
||||
" // Set all scratchpad clauses to true\n"
|
||||
" for (uint j = 0; j < ccnt; j += glbsz) {\n"
|
||||
" //uchar cond = (j + glbid) < ccnt;\n"
|
||||
" // If ptr would go past end of array, set it to last element\n"
|
||||
" // j = j * cond + (!cond) * (ccnt - glbid - 1);\n"
|
||||
" if ((j + glbid) < ccnt) scratchpad[j + glbid] = 1;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" for (uint j = 0; j < cnt; j += glbsz) {\n"
|
||||
" // uchar cond = (j + glbid) < cnt;\n"
|
||||
" // Last element cap\n"
|
||||
" // j = j * cond + (!cond) * (cnt - glbid - 1);\n"
|
||||
" if ((j + glbid) < cnt) {\n"
|
||||
" uint varind = vars[j + glbid];\n"
|
||||
" varind = (vcnt - 1) - varind;\n"
|
||||
" uint iind = varind >> 5U;\n"
|
||||
" uint bind = varind & 0b11111U;\n"
|
||||
" uchar cpar = (output[iind + 1] >> bind) & 1U;\n"
|
||||
" if (cpar != pars[j + glbid]) scratchpad[clauses[j + glbid]] = 0;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" for (uint j = 0; j < ccnt; j += glbsz) {\n"
|
||||
" if (((j + glbid) < ccnt) && (scratchpad[j + glbid] == 1)) {\n"
|
||||
" setmax = 1;\n"
|
||||
" // printf(\"%u\\n\", (~output[1]) & 0b11111);\n"
|
||||
" // printf(\"%u%u%u%u%u\\n\", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);\n"
|
||||
" // printf(\"%u - %u\\n\", j + glbid, scratchpad[j + glbid]);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // uchar cond = (j + glbid) < cnt;\n"
|
||||
" // Last element cap\n"
|
||||
" // j = j * cond + (!cond) * (cnt - glbid - 1);\n"
|
||||
" // if (scratchpad[j + glbid] == 1) setmax = true;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" if (setmax) {\n"
|
||||
" // Set maxval array to zero\n"
|
||||
" maxvals[glbid] = 0;\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Accumulate and reduce the maximums\n"
|
||||
" for (uint j = 0; j < ccnt; j += glbsz) {\n"
|
||||
" uint a = maxvals[glbid];\n"
|
||||
" uint b = scratchpad[j + glbid] * lvars[j + glbid];\n"
|
||||
" uint c = max(a, b);\n"
|
||||
" if ((j + glbid) < ccnt) maxvals[glbid] = c;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Final reduction pass\n"
|
||||
" uint maxj = maxvals[0];\n"
|
||||
" for (uint j = 1; j < glbsz; ++j) {\n"
|
||||
" maxj = max(maxj, maxvals[j]);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // Add to the counter\n"
|
||||
" if (glbid == 0) {\n"
|
||||
" // printf(\"> %u\\n\", maxj);\n"
|
||||
" stateaddpow(wcnt, output + 1, maxj);\n"
|
||||
" // printf(\">> %u%u%u%u%u\\n\", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // Check counter for overflow\n"
|
||||
" if (output[wcnt] >= maxctr) {\n"
|
||||
" output[0] = 1;\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
" } else {\n"
|
||||
" // SAT. Set status and assignment.\n"
|
||||
" output[0] = 0;\n"
|
||||
" if (glbid == 0) {\n"
|
||||
" for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];\n"
|
||||
" }\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
" iter++;\n"
|
||||
" }\n"
|
||||
"}";
|
||||
|
||||
|
||||
static const size_t kernel_len = 4559;
|
||||
|
||||
#define GLOBAL_SIZE (256)
|
||||
#define LOCAL_SIZE (GLOBAL_SIZE)
|
||||
|
||||
#define CHECKASGN (true)
|
||||
|
||||
#define DEBUG
|
||||
|
||||
i32 gpusolve(cnf* c) {
|
||||
cl_platform_id platformid = NULL;
|
||||
cl_device_id deviceid = NULL;
|
||||
cl_uint numdevices;
|
||||
cl_uint numplatforms;
|
||||
|
||||
FILE *fp;
|
||||
char *source_str;
|
||||
size_t source_size;
|
||||
|
||||
fp = fopen("../psat.cl", "r");
|
||||
if (!fp) {
|
||||
fprintf(stderr, "Failed to load kernel.\n");
|
||||
exit(1);
|
||||
}
|
||||
source_str = (char*)malloc(0x100000);
|
||||
source_size = fread( source_str, 1, 0x100000, fp);
|
||||
fclose( fp );
|
||||
|
||||
u32 wordcnt = 1 + ((c->varcnt) >> 5U);
|
||||
|
||||
u32* solution = calloc((wordcnt + 1), sizeof(u32));
|
||||
if (solution == NULL) {
|
||||
printf("Failed to allocate solution buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve OpenCL platform IDs\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Found %u platforms\n", numplatforms);
|
||||
res = clGetDeviceIDs(platformid, CL_DEVICE_TYPE_GPU, 1, &deviceid, &numdevices);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve OpenCL device IDs\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Found %u devices\n", numdevices);
|
||||
|
||||
cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL context\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cl_command_queue commqueue = clCreateCommandQueueWithProperties(context, deviceid, 0, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL command queue\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Device memory buffers:
|
||||
/* For the CNF:
|
||||
* {clausecnt, literalcnt, varcnt)
|
||||
* variable array
|
||||
* clause array
|
||||
* parity array
|
||||
*
|
||||
* Other:
|
||||
* Status
|
||||
* A single counter
|
||||
*/
|
||||
|
||||
// TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer
|
||||
cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF header buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, c->clausecnt * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF lvar buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF variable buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuclauses = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF clause buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uchar), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF parity buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Allocate scratchpad memory
|
||||
cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, c->clausecnt * sizeof(cl_uchar), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF subsumption scratchpad buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
/*
|
||||
cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF maxval buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
*/
|
||||
|
||||
cl_mem gpuoutput = clCreateBuffer(context, CL_MEM_READ_WRITE, (wordcnt + 1) * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create output buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
u32 cnfheader[3] = { c->litcnt, c->varcnt, c->clausecnt };
|
||||
|
||||
// Load buffers to GPU
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 3 * sizeof(cl_uint), cnfheader, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF header write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, c->clausecnt * sizeof(cl_uint), c->lastvars, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF lvar write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->variables, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF variable write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuclauses, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->clauses, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF clause write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->litcnt * sizeof(cl_uchar), c->pars, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF parity write\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const char* kernelptr = kernel_source;
|
||||
|
||||
cl_program satprog = clCreateProgramWithSource(context, 1, (const char**) &source_str, (const size_t*) &source_size, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL program\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
res = clBuildProgram(satprog, 1, &deviceid, NULL, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
char* logbuf = malloc(sizeof(char) * 65536);
|
||||
size_t loglen = 0;
|
||||
res = clGetProgramBuildInfo(satprog, deviceid, CL_PROGRAM_BUILD_LOG, sizeof(char) * 65536, logbuf, &loglen);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve build logs\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Build failed\n");
|
||||
printf("%s\n", logbuf);
|
||||
free(logbuf);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cl_kernel kernel = clCreateKernel(satprog, "vectorSAT", &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create kernel\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
|
||||
res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
|
||||
res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
|
||||
res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuclauses);
|
||||
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuparities);
|
||||
|
||||
res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuoutput);
|
||||
|
||||
res = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*) &gpuscratchpad);
|
||||
res = clSetKernelArg(kernel, 7, LOCAL_SIZE * sizeof(cl_uint), NULL);
|
||||
|
||||
|
||||
u64 starttime = utime();
|
||||
size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE };
|
||||
res = clEnqueueNDRangeKernel(commqueue, kernel, 1, NULL, itemsize, itemsize + 1, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue kernel for execution\n");
|
||||
exit(res);
|
||||
}
|
||||
|
||||
|
||||
|
||||
res = clEnqueueReadBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to read kernel output\n");
|
||||
exit(1);
|
||||
}
|
||||
u64 endtime = utime();
|
||||
|
||||
if (solution[0] == 1) {
|
||||
printf("UNSAT\n");
|
||||
} else if (solution[0] == 0) {
|
||||
printf("SAT\n");
|
||||
for (u32 k = 0; k < c->varcnt; ++k) {
|
||||
u32 vind = (c->varcnt - 1) - k;
|
||||
u32 iind = vind >> 5U;
|
||||
u32 bind = vind & 0b11111U;
|
||||
u8 par = (solution[iind + 1] >> bind) & 1U;
|
||||
printf("%u", par);
|
||||
}
|
||||
printf("\n");
|
||||
if (CHECKASGN) {
|
||||
u8* assigncheck = calloc(c->clausecnt, sizeof(u8));
|
||||
for (u32 i = 0; i < c->litcnt; ++i) {
|
||||
u32 g = ((c->varcnt - 1) - c->variables[i]) >> 5U;
|
||||
u32 h = ((c->varcnt - 1) - c->variables[i]) & 0b11111U;
|
||||
u8 paract = (solution[g + 1] >> h) & 1U;
|
||||
if (c->pars[i] == paract) assigncheck[c->clauses[i]] = true;
|
||||
}
|
||||
for (u32 i = 0; i < c->clausecnt; ++i) {
|
||||
if (!assigncheck[i]) {
|
||||
printf("Failed assignment check\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
free(assigncheck);
|
||||
printf("Passed assignment check\n");
|
||||
}
|
||||
} else {
|
||||
printf("What the fuck???\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Actual time: %f seconds\n", ((f64) (endtime - starttime)) / 1000000.0);
|
||||
|
||||
res = clFlush(commqueue);
|
||||
res = clFinish(commqueue);
|
||||
res = clReleaseKernel(kernel);
|
||||
res = clReleaseProgram(satprog);
|
||||
res = clReleaseMemObject(gpuheader);
|
||||
res = clReleaseMemObject(gpulvars);
|
||||
res = clReleaseMemObject(gpuvariables);
|
||||
res = clReleaseMemObject(gpuclauses);
|
||||
res = clReleaseMemObject(gpuparities);
|
||||
res = clReleaseMemObject(gpuoutput);
|
||||
res = clReleaseMemObject(gpuscratchpad);
|
||||
res = clReleaseCommandQueue(commqueue);
|
||||
res = clReleaseContext(context);
|
||||
i32 retval = solution[0];
|
||||
free(solution);
|
||||
free(source_str);
|
||||
|
||||
return retval;
|
||||
}
|
||||
Reference in New Issue
Block a user