Early subsumption complete

Signed-off-by: gothictomato <gothictomato@pm.me>
This commit is contained in:
gothictomato
2022-08-21 12:49:48 -04:00
parent a1b6cdaea9
commit 4c111f4b6f
8 changed files with 277 additions and 323 deletions

View File

@@ -1,9 +1,11 @@
#include "gpusolver.h"
#include <CL/cl.h>
#include "time.h"
#include "gmp.h"
#define GLOBAL_SIZE (256)
#define LOCAL_SIZE (GLOBAL_SIZE)
#define LOCAL_SIZE (128)
#define GLOBAL_SIZE (1024)
#define CHECKASGN (true)
@@ -28,7 +30,7 @@ i32 gpusolve(cnf* c) {
source_size = fread( source_str, 1, 0x100000, fp);
fclose( fp );
u32 wordcnt = 1 + ((c->varcnt) >> 5U);
u32 wordcnt = 1 + ((c->cnts[0]) >> 5U);
u32* solution = calloc((wordcnt + 1), sizeof(u32));
if (solution == NULL) {
@@ -36,6 +38,13 @@ i32 gpusolve(cnf* c) {
exit(1);
}
mpz_t gmpmax;
mpz_init(gmpmax);
mpz_ui_pow_ui(gmpmax, 2, c->cnts[0]);
mpz_div_ui(gmpmax, gmpmax, GLOBAL_SIZE);
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
mpz_clear(gmpmax);
cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms);
if (res != CL_SUCCESS) {
printf("Failed to retrieve OpenCL platform IDs\n");
@@ -74,38 +83,27 @@ i32 gpusolve(cnf* c) {
*/
// TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer
cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * sizeof(cl_uint), NULL, &res);
cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 2 * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF header buffer\n");
exit(1);
}
cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, c->clausecnt * sizeof(cl_uint), NULL, &res);
cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * c->cnts[1] * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF lvar buffer\n");
exit(1);
}
cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF variable buffer\n");
exit(1);
}
cl_mem gpuclauses = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF clause buffer\n");
exit(1);
}
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uchar), NULL, &res);
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uchar), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF parity buffer\n");
exit(1);
}
// Allocate scratchpad memory
cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, c->clausecnt * sizeof(cl_uchar), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF subsumption scratchpad buffer\n");
exit(1);
}
/*
cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
@@ -120,30 +118,29 @@ i32 gpusolve(cnf* c) {
exit(1);
}
u32 cnfheader[3] = { c->litcnt, c->varcnt, c->clausecnt };
// Load buffers to GPU
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 3 * sizeof(cl_uint), cnfheader, 0, NULL, NULL);
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF header write\n");
exit(1);
}
res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, c->clausecnt * sizeof(cl_uint), c->lastvars, 0, NULL, NULL);
res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, 3 * c->cnts[1] * sizeof(cl_uint), c->clausedat, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF lvar write\n");
exit(1);
}
res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->variables, 0, NULL, NULL);
res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uint), c->variables, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF variable write\n");
exit(1);
}
res = clEnqueueWriteBuffer(commqueue, gpuclauses, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->clauses, 0, NULL, NULL);
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uchar), c->parities, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF clause write\n");
printf("Failed to queue CNF parity write\n");
exit(1);
}
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->litcnt * sizeof(cl_uchar), c->pars, 0, NULL, NULL);
res = clEnqueueWriteBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF parity write\n");
exit(1);
@@ -179,18 +176,16 @@ i32 gpusolve(cnf* c) {
size_t maxworkgrpu = 0;
res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL);
printf("Max work group size: %lu\n", maxworkgrpu);
// printf("Max work group size: %lu\n", maxworkgrpu);
res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuclauses);
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuparities);
res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuparities);
res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuoutput);
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuoutput);
res = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*) &gpuscratchpad);
res = clSetKernelArg(kernel, 7, LOCAL_SIZE * sizeof(cl_uint), NULL);
res = clSetKernelArg(kernel, 5, 2 * wordcnt * sizeof(cl_uint) * LOCAL_SIZE, NULL);
// u64 starttime = utime();
size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE };
@@ -207,34 +202,39 @@ i32 gpusolve(cnf* c) {
}
// u64 endtime = utime();
if (solution[0] == 1) {
if (solution[0] == 0) {
printf("UNSAT\n");
} else if (solution[0] == 0) {
printf("SAT\n");
for (u32 k = 0; k < c->varcnt; ++k) {
u32 vind = (c->varcnt - 1) - k;
} else if (solution[0] == 1) {
printf("SAT: ");
for (u32 k = 0; k < c->cnts[0]; ++k) {
u32 vind = (c->cnts[0] - 1) - k;
u32 iind = vind >> 5U;
u32 bind = vind & 0b11111U;
u8 par = (solution[iind + 1] >> bind) & 1U;
printf("%u", par);
}
printf("\n");
if (CHECKASGN) {
u8* assigncheck = calloc(c->clausecnt, sizeof(u8));
for (u32 i = 0; i < c->litcnt; ++i) {
u32 g = ((c->varcnt - 1) - c->variables[i]) >> 5U;
u32 h = ((c->varcnt - 1) - c->variables[i]) & 0b11111U;
u8 paract = (solution[g + 1] >> h) & 1U;
if (c->pars[i] == paract) assigncheck[c->clauses[i]] = true;
}
for (u32 i = 0; i < c->clausecnt; ++i) {
if (!assigncheck[i]) {
printf("Failed assignment check\n");
solution[0] = 4;
u8 checkres = 0;
for (u32 i = 0; i < c->cnts[1]; ++i) {
checkres = 0;
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) {
u32 v = c->variables[c->clausedat[3 * i] + j];
u32 vv = c->cnts[0] - 1;
u32 g = (vv - v) >> 5U;
u32 h = (vv - v) & 0b11111U;
u8 paract = (solution[g + 1] >> h) & 1U;
if (c->parities[c->clausedat[3 * i] + j] == paract) {
checkres = 1;
break;
}
}
if (!checkres) break;
}
if (checkres) {
printf(" \xE2\x9C\x93\n");
} else {
printf(" -\n");
}
free(assigncheck);
printf("Passed assignment check\n");
}
} else {
printf("What the fuck???\n");
@@ -253,16 +253,14 @@ i32 gpusolve(cnf* c) {
res = clReleaseMemObject(gpuheader);
res = clReleaseMemObject(gpulvars);
res = clReleaseMemObject(gpuvariables);
res = clReleaseMemObject(gpuclauses);
res = clReleaseMemObject(gpuparities);
res = clReleaseMemObject(gpuoutput);
res = clReleaseMemObject(gpuscratchpad);
res = clReleaseDevice(deviceid);
i32 retval = solution[0];
i32 retval = (i32) solution[0];
free(solution);
free(source_str);
return retval;
}
}