267 lines
8.6 KiB
C
267 lines
8.6 KiB
C
#include "gpusolver.h"
|
|
#include <CL/cl.h>
|
|
#include "time.h"
|
|
#include "gmp.h"
|
|
|
|
|
|
#define LOCAL_SIZE (128)
|
|
#define GLOBAL_SIZE (1024)
|
|
|
|
#define CHECKASGN (true)
|
|
|
|
#define DEBUG
|
|
|
|
i32 gpusolve(cnf* c) {
|
|
cl_platform_id platformid = NULL;
|
|
cl_device_id deviceid = NULL;
|
|
cl_uint numdevices;
|
|
cl_uint numplatforms;
|
|
|
|
FILE *fp;
|
|
char *source_str;
|
|
size_t source_size;
|
|
|
|
fp = fopen("../psat.cl", "r");
|
|
if (!fp) {
|
|
fprintf(stderr, "Failed to load kernel.\n");
|
|
exit(1);
|
|
}
|
|
source_str = (char*)malloc(0x100000);
|
|
source_size = fread( source_str, 1, 0x100000, fp);
|
|
fclose( fp );
|
|
|
|
u32 wordcnt = 1 + ((c->cnts[0]) >> 5U);
|
|
|
|
u32* solution = calloc((wordcnt + 1), sizeof(u32));
|
|
if (solution == NULL) {
|
|
printf("Failed to allocate solution buffer\n");
|
|
exit(1);
|
|
}
|
|
|
|
mpz_t gmpmax;
|
|
mpz_init(gmpmax);
|
|
mpz_ui_pow_ui(gmpmax, 2, c->cnts[0]);
|
|
mpz_div_ui(gmpmax, gmpmax, GLOBAL_SIZE);
|
|
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
|
|
mpz_clear(gmpmax);
|
|
|
|
cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to retrieve OpenCL platform IDs\n");
|
|
exit(1);
|
|
}
|
|
// printf("Found %u platforms\n", numplatforms);
|
|
res = clGetDeviceIDs(platformid, CL_DEVICE_TYPE_GPU, 1, &deviceid, &numdevices);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to retrieve OpenCL device IDs\n");
|
|
exit(1);
|
|
}
|
|
// printf("Found %u devices\n", numdevices);
|
|
|
|
cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create OpenCL context\n");
|
|
exit(1);
|
|
}
|
|
|
|
cl_command_queue commqueue = clCreateCommandQueueWithProperties(context, deviceid, 0, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create OpenCL command queue\n");
|
|
exit(1);
|
|
}
|
|
|
|
// Device memory buffers:
|
|
/* For the CNF:
|
|
* {clausecnt, literalcnt, varcnt)
|
|
* variable array
|
|
* clause array
|
|
* parity array
|
|
*
|
|
* Other:
|
|
* Status
|
|
* A single counter
|
|
*/
|
|
|
|
// TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer
|
|
cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 2 * sizeof(cl_uint), NULL, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create CNF header buffer\n");
|
|
exit(1);
|
|
}
|
|
cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * c->cnts[1] * sizeof(cl_uint), NULL, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create CNF lvar buffer\n");
|
|
exit(1);
|
|
}
|
|
cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uint), NULL, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create CNF variable buffer\n");
|
|
exit(1);
|
|
}
|
|
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uchar), NULL, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create CNF parity buffer\n");
|
|
exit(1);
|
|
}
|
|
|
|
/*
|
|
cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create CNF maxval buffer\n");
|
|
exit(1);
|
|
}
|
|
*/
|
|
|
|
cl_mem gpuoutput = clCreateBuffer(context, CL_MEM_READ_WRITE, (wordcnt + 1) * sizeof(cl_uint), NULL, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create output buffer\n");
|
|
exit(1);
|
|
}
|
|
|
|
// Load buffers to GPU
|
|
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to queue CNF header write\n");
|
|
exit(1);
|
|
}
|
|
res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, 3 * c->cnts[1] * sizeof(cl_uint), c->clausedat, 0, NULL, NULL);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to queue CNF lvar write\n");
|
|
exit(1);
|
|
}
|
|
res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uint), c->variables, 0, NULL, NULL);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to queue CNF variable write\n");
|
|
exit(1);
|
|
}
|
|
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uchar), c->parities, 0, NULL, NULL);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to queue CNF parity write\n");
|
|
exit(1);
|
|
}
|
|
|
|
res = clEnqueueWriteBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to queue CNF parity write\n");
|
|
exit(1);
|
|
}
|
|
|
|
cl_program satprog = clCreateProgramWithSource(context, 1, (const char**) &source_str, (const size_t*) &source_size, &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create OpenCL program\n");
|
|
exit(1);
|
|
}
|
|
|
|
res = clBuildProgram(satprog, 1, &deviceid, NULL, NULL, NULL);
|
|
if (res != CL_SUCCESS) {
|
|
char* logbuf = malloc(sizeof(char) * 65536);
|
|
size_t loglen = 0;
|
|
res = clGetProgramBuildInfo(satprog, deviceid, CL_PROGRAM_BUILD_LOG, sizeof(char) * 65536, logbuf, &loglen);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to retrieve build logs\n");
|
|
exit(1);
|
|
}
|
|
printf("Build failed\n");
|
|
printf("%s\n", logbuf);
|
|
free(logbuf);
|
|
exit(1);
|
|
}
|
|
|
|
cl_kernel kernel = clCreateKernel(satprog, "vectorSAT", &res);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to create kernel\n");
|
|
exit(1);
|
|
}
|
|
|
|
size_t maxworkgrpu = 0;
|
|
res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL);
|
|
|
|
// printf("Max work group size: %lu\n", maxworkgrpu);
|
|
|
|
res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
|
|
res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
|
|
res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
|
|
res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuparities);
|
|
|
|
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuoutput);
|
|
|
|
res = clSetKernelArg(kernel, 5, 2 * wordcnt * sizeof(cl_uint) * LOCAL_SIZE, NULL);
|
|
|
|
// u64 starttime = utime();
|
|
size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE };
|
|
res = clEnqueueNDRangeKernel(commqueue, kernel, 1, NULL, itemsize, itemsize + 1, 0, NULL, NULL);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to queue kernel for execution\n");
|
|
exit(res);
|
|
}
|
|
|
|
res = clEnqueueReadBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
|
|
if (res != CL_SUCCESS) {
|
|
printf("Failed to read kernel output\n");
|
|
exit(1);
|
|
}
|
|
// u64 endtime = utime();
|
|
|
|
if (solution[0] == 0) {
|
|
printf("UNSAT\n");
|
|
} else if (solution[0] == 1) {
|
|
printf("SAT: ");
|
|
for (u32 k = 0; k < c->cnts[0]; ++k) {
|
|
u32 vind = (c->cnts[0] - 1) - k;
|
|
u32 iind = vind >> 5U;
|
|
u32 bind = vind & 0b11111U;
|
|
u8 par = (solution[iind + 1] >> bind) & 1U;
|
|
printf("%u", par);
|
|
}
|
|
if (CHECKASGN) {
|
|
u8 checkres = 0;
|
|
for (u32 i = 0; i < c->cnts[1]; ++i) {
|
|
checkres = 0;
|
|
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) {
|
|
u32 v = c->variables[c->clausedat[3 * i] + j];
|
|
u32 vv = c->cnts[0] - 1;
|
|
u32 g = (vv - v) >> 5U;
|
|
u32 h = (vv - v) & 0b11111U;
|
|
u8 paract = (solution[g + 1] >> h) & 1U;
|
|
if (c->parities[c->clausedat[3 * i] + j] == paract) {
|
|
checkres = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!checkres) break;
|
|
}
|
|
if (checkres) {
|
|
printf(" \xE2\x9C\x93\n");
|
|
} else {
|
|
printf(" -\n");
|
|
}
|
|
}
|
|
} else {
|
|
printf("What the fuck???\n");
|
|
solution[0] = 3;
|
|
}
|
|
// printf("Actual time: %f seconds\n", ((f64) (endtime - starttime)) / 1000000.0);
|
|
|
|
res = clFlush(commqueue);
|
|
res = clFinish(commqueue);
|
|
res = clReleaseKernel(kernel);
|
|
res = clReleaseProgram(satprog);
|
|
|
|
res = clReleaseCommandQueue(commqueue);
|
|
res = clReleaseContext(context);
|
|
|
|
res = clReleaseMemObject(gpuheader);
|
|
res = clReleaseMemObject(gpulvars);
|
|
res = clReleaseMemObject(gpuvariables);
|
|
res = clReleaseMemObject(gpuparities);
|
|
res = clReleaseMemObject(gpuoutput);
|
|
|
|
res = clReleaseDevice(deviceid);
|
|
|
|
i32 retval = (i32) solution[0];
|
|
free(solution);
|
|
free(source_str);
|
|
|
|
return retval;
|
|
}
|