commit 2ad31d53e329be59d3b6c6c5bff33e2209e1a421 Author: gothictomato Date: Sun Jul 17 14:03:47 2022 -0400 Initial commit. diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..d0d9663 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.22) +project(psat C) + +set(CMAKE_C_STANDARD 99) +set(CMAKE_C_FLAGS "-O3") + +add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h sstree.c sstree.h cpusolver.c cpusolver.h) + +target_link_libraries(psat -lOpenCL) \ No newline at end of file diff --git a/cnf.c b/cnf.c new file mode 100644 index 0000000..cdd7059 --- /dev/null +++ b/cnf.c @@ -0,0 +1,295 @@ +#include "cnf.h" + +cnf* readDIMACS(char* path) { + cnf* c = malloc(sizeof(cnf)); + CHECK(c, "Failed to alloc CNF struct\n"); + + FILE* f = fopen(path, "r"); + CHECK(f, "Failed to open file\n"); + + u32 bufsize = 1000; + char* buf = malloc(sizeof(char) * bufsize); + CHECK(buf, "Failed to alloc read buffer\n"); + + CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n"); + + while (buf[0] == 'c') { + CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n"); + } + + char* temp = buf; + while (*temp == ' ' || *temp == 'p' || *temp == 'c' || *temp == 'n' || *temp == 'f') { + temp++; + } + + c->varcnt = 0; + while (((u8) (*temp - '0')) < 10) { + c->varcnt *= 10; + c->varcnt += (*temp - '0'); + temp++; + } + + while (*temp == ' ') temp++; + + c->clausecnt = 0; + while (((u8) (*temp - '0')) < 10) { + c->clausecnt *= 10; + c->clausecnt += (*temp - '0'); + temp++; + } + + u32 nsize = sizeof(char) * (c->varcnt * 12LU) + 10; + char* nbuf = realloc(buf, nsize); + CHECK(nbuf, "Failed to realloc read buffer\n"); + + buf = nbuf; + + u32 cnt = 0; + u32 cap = 32; + + c->variables = calloc( cap, sizeof(u32)); + CHECK(c->variables, "Failed to allocate literal variables\n"); + c->clauses = calloc( cap, sizeof(u32)); + CHECK(c->clauses, "Failed to allocate literal clauses\n"); + c->pars = calloc( cap, sizeof(u8)); + CHECK(c->pars, "Failed to allocate literal parities\n"); + c->lastvars = calloc( c->clausecnt, sizeof(u32)); + CHECK(c->lastvars, "Failed to allocate clause lastvars\n"); + + + for (u32 i = 0; i < c->clausecnt; ++i) { + CHECK(fgets(buf, nsize, f), "Failed to read clause\n"); + + + + temp = buf; + while (*temp == ' ') temp++; + + bool empty = true; + bool tr = true; + while (*temp != '\n') { + if (cnt == cap) { + u32 ncap = cap << 1U; + c->variables = realloc(c->variables, sizeof(u32) * ncap); + memset(c->variables + cap, 0, sizeof(u32) * cap); + CHECK(c->variables, "Failed to realloc variable array\n"); + c->clauses = realloc(c->clauses, sizeof(u32) * ncap); + memset(c->clauses + cap, 0, sizeof(u32) * cap); + CHECK(c->clauses, "Failed to realloc clause array\n"); + c->pars = realloc(c->pars, sizeof(u8) * ncap); + memset(c->pars + cap, 0, sizeof(u8) * cap); + CHECK(c->pars, "Failed to realloc parity array\n"); + cap = ncap; + } + + if (*temp == '-') { + tr = false; + } else if (((u8) (*temp - '0')) < 10) { + c->variables[cnt] *= 10; + c->variables[cnt] += (*temp - '0'); + } else { + while (temp[1] == ' ') temp++; + c->pars[cnt] = tr; + c->clauses[cnt] = i; + c->variables[cnt] -= 1; + if (c->lastvars[i] < c->variables[cnt]) c->lastvars[i] = c->variables[cnt]; + empty = false; + tr = true; + cnt++; + if (temp[1] == '0') { + break; + } + } + temp++; + } + + if (empty) { + printf("UNSAT: Empty clause %u\n", i); + return NULL; + } + } + + for (u32 i = 0; i < c->clausecnt; ++i) { + c->lastvars[i] += 1U; + c->lastvars[i] = c->varcnt - c->lastvars[i]; + } + + c->litcnt = cnt; + + c->variables = realloc(c->variables, sizeof(u32) * c->litcnt); + c->clauses = realloc(c->clauses, sizeof(u32) * c->litcnt); + c->pars = realloc(c->pars, sizeof(u8) * c->litcnt); + free(buf); + if (fclose(f)) { + printf("Failed to close file\n"); + return NULL; + } + return c; +} + +void printcnf(cnf* c) { + printf("p cnf %u %u\n", c->varcnt, c->clausecnt); + u32 pclause = 0; + for (u32 i = 0; i < c->litcnt; ++i) { + if (c->clauses[i] != pclause) printf("0\n"); + if (c->pars[i] == 0) printf("-"); + printf("%u ", c->variables[i] + 1); + pclause = c->clauses[i]; + } + printf("0\n"); +} + +void freecnf(cnf* c) { + free(c->pars); + free(c->clauses); + free(c->variables); + free(c->lastvars); + free(c); +} + +const u64 RBITS = 8; +const u64 RSIZE = 1LU << RBITS; +const u64 RLVLS = (63LU / RBITS) + 1; +const u64 RMASK = RSIZE - 1; + +// Currently sorting array of program pointers by the score of the programs +// I need to sort by lastvar, within that sort by clause + +void sortlastnum(cnf* c, u64 N) { + bool v = false; + u32* d = malloc(sizeof(u32) * c->litcnt); + u32* d2 = malloc(sizeof(u32) * c->litcnt); + u8* d3 = malloc(sizeof(u8) * c->litcnt); + + u64 qb[RLVLS * RSIZE]; + u64 qe[RLVLS * RSIZE]; + memset(qb, 0, sizeof(u64) * RLVLS * RSIZE); + memset(qe, 0, sizeof(u64) * RLVLS * RSIZE); + + for (u64 pass = 0; pass < RLVLS; ++pass) { + u64 shift = pass * RBITS; + + for (u64 i = 0; i < N; ++i) { + u32 val = UINT32_MAX - c->lastvars[(c->clausecnt - 1) - c->clauses[i]]; + u64 ind = (val >> shift) & RMASK; + qe[pass * RSIZE + ind]++; + } + + u64 uc = 0; + for (u64 i = 0; i < RSIZE; ++i) if (qe[pass * RSIZE + i]) uc++; + if (uc == 1) continue; + + qb[pass * RSIZE] = 0; + for (u64 i = 1; i < RSIZE; ++i) { + qb[pass * RSIZE + i] = qb[pass * RSIZE + i - 1] + qe[pass * RSIZE + i - 1]; + } + + for (u64 i = 0; i < N; ++i) { + u32 val = UINT32_MAX - c->lastvars[(c->clausecnt - 1) - c->clauses[i]]; + u64 ind = (val >> shift) & RMASK; + d[qb[pass * RSIZE + ind]] = c->variables[i]; + d2[qb[pass * RSIZE + ind]] = c->clauses[i]; + d3[qb[pass * RSIZE + ind]] = c->pars[i]; + qb[pass * RSIZE + ind]++; + __builtin_prefetch(d + qb[pass * RSIZE + ind] + 1); + __builtin_prefetch(d2 + qb[pass * RSIZE + ind] + 1); + __builtin_prefetch(d3 + qb[pass * RSIZE + ind] + 1); + } + + u32* tptr = c->variables; + c->variables = d; + d = tptr; + tptr = c->clauses; + c->clauses = d2; + d2 = tptr; + u8* tptr2 = c->pars; + c->pars = d3; + d3 = tptr2; + v = !v; + } + + if (v) { + u32* tptr = c->variables; + c->variables = d; + d = tptr; + tptr = c->clauses; + c->clauses = d2; + d2 = tptr; + u8* tptr2 = c->pars; + c->pars = d3; + d3 = tptr2; + memcpy(c->variables, d, N * sizeof(u32)); + memcpy(c->clauses, d2, N * sizeof(u32)); + memcpy(c->pars, d3, N * sizeof(u8)); + } + free(d); + free(d2); + free(d3); + + u32* swaparr = malloc(sizeof(u32) * c->clausecnt); + for (u32 i = 0; i < c->clausecnt; ++i) { + swaparr[i] = i; + } + + v = false; + d = malloc(sizeof(u32) * c->clausecnt); + + memset(qb, 0, sizeof(u64) * RLVLS * RSIZE); + memset(qe, 0, sizeof(u64) * RLVLS * RSIZE); + + for (u64 pass = 0; pass < RLVLS; ++pass) { + u64 shift = pass * RBITS; + + for (u64 i = 0; i < c->clausecnt; ++i) { + u32 val = UINT32_MAX - c->lastvars[swaparr[i]]; + u64 ind = (val >> shift) & RMASK; + qe[pass * RSIZE + ind]++; + } + + u64 uc = 0; + for (u64 i = 0; i < RSIZE; ++i) if (qe[pass * RSIZE + i]) uc++; + if (uc == 1) continue; + + qb[pass * RSIZE] = 0; + for (u64 i = 1; i < RSIZE; ++i) { + qb[pass * RSIZE + i] = qb[pass * RSIZE + i - 1] + qe[pass * RSIZE + i - 1]; + } + + for (u64 i = 0; i < c->clausecnt; ++i) { + u32 val = UINT32_MAX - c->lastvars[swaparr[i]]; + u64 ind = (val >> shift) & RMASK; + d[qb[pass * RSIZE + ind]] = swaparr[i]; + qb[pass * RSIZE + ind]++; + __builtin_prefetch(d + qb[pass * RSIZE + ind] + 1); + } + + u32* tptr = swaparr; + swaparr = d; + d = tptr; + v = !v; + } + + if (v) { + u32* tptr = swaparr; + swaparr = d; + d = tptr; + memcpy(swaparr, d, c->clausecnt * sizeof(u32)); + } + //free(d); + + for (uint i = 0; i < c->clausecnt; ++i) { + d[swaparr[i]] = i; + } + + for (uint i = 0; i < c->litcnt; ++i) { + c->clauses[i] = d[c->clauses[i]]; + } + + for (uint i = 0; i < c->clausecnt; ++i) { + swaparr[d[i]] = c->lastvars[i]; + } + + memcpy(c->lastvars, swaparr, sizeof(u32) * c->clausecnt); + free(swaparr); + free(d); +} \ No newline at end of file diff --git a/cnf.h b/cnf.h new file mode 100644 index 0000000..df8f7e6 --- /dev/null +++ b/cnf.h @@ -0,0 +1,31 @@ +#pragma once +#include "types.h" +#include +#include +#include + +#define CHECK(X, Y) if (X == NULL) { \ + printf(Y); \ + return NULL; \ +} + + +typedef struct { + u32 varcnt; + u32 litcnt; + u32 clausecnt; + u32* lastvars; + u32* variables; + u32* clauses; + u8* pars; +} cnf; + +cnf* readDIMACS(char* path); + +void printcnf(cnf* c); + + +void sortlastnum(cnf* c, u64 N); + + +void freecnf(cnf* c); \ No newline at end of file diff --git a/cpusolver.c b/cpusolver.c new file mode 100644 index 0000000..b77e88d --- /dev/null +++ b/cpusolver.c @@ -0,0 +1 @@ +#include "cpusolver.h" diff --git a/cpusolver.h b/cpusolver.h new file mode 100644 index 0000000..1067199 --- /dev/null +++ b/cpusolver.h @@ -0,0 +1,31 @@ +#pragma once +#include "types.h" + +typedef struct { + u32 varcnt; + u32 clausecnt; + u32* clauseinds; + u32* clausevals; + + u32* watchlist; +} cpusolver; + +void cpusolve() { + cpusolver s; + + +} +/* + * Read in DIMACs + * A clause is a list of variables and their assignments + * + * Create watchlists: + * 2 entries for each list - flag bit for if its a binary clause + * + * 2-watch: + * If one of them is marked false, remove and replace + * if there are no other literals in the clause that are true, unitprop the last literal + * if there are no literals that evaluate to true and the other watch is false, UNSAT + * if a literal is true, do nothing + * if 2 literals are unassigned, clause is unsat + */ \ No newline at end of file diff --git a/gpusolver.c b/gpusolver.c new file mode 100644 index 0000000..52e8906 --- /dev/null +++ b/gpusolver.c @@ -0,0 +1,403 @@ +#include "gpusolver.h" +#include +#include "time.h" + + +static const char kernel_source[4560] = "static inline void stateaddpow(uint wcnt, uint* state, uint pow) {\n" + " uint corpow = pow & 0b11111U;\n" + " uint startind = pow >> 5U;\n" + " uint tr = 1U << corpow;\n" + " uint tval = state[startind] + tr;\n" + " uchar choice = !((tval > state[startind]) && (tval >= tr));\n" + " state[startind] = tval;\n" + " for (uint i = 0; i < wcnt; ++i) {\n" + " uchar cond = (i > startind);\n" + " state[i] += choice * cond;\n" + " choice = choice & (state[i] == 0) * cond + (!cond) & choice;\n" + " }\n" + "}\n" + "\n" + "__global static uint setmax;\n" + "\n" + "__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __global uint* maxvals) {\n" + " output[0] = 2;\n" + "\n" + " uint cnt = cnfheader[0];\n" + " uint vcnt = cnfheader[1];\n" + " uint ccnt = cnfheader[2];\n" + "\n" + " uint wcnt = 1 + (vcnt >> 5U);\n" + "\n" + " // Zero out the counter\n" + " for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;\n" + "\n" + " uint maxctr = 1U << (vcnt & 0b11111U);\n" + "\n" + " uint glbid = get_global_id(0);\n" + " uint glbsz = get_global_size(0);\n" + "\n" + " /*\n" + " uint locid = get_local_id(0);\n" + " uint locsz = get_local_size(0);\n" + " uint grpid = get_group_id(0);\n" + " uint grpcn = get_num_groups(0);\n" + " */\n" + "\n" + " bool done = false;\n" + " uint iter = 0;\n" + " while (output[0] == 2) {\n" + " // if (glbid == 0) printf(\"%s\\n\", \":~\");\n" + " setmax = 0;\n" + " uint maxnumx = 0;\n" + "\n" + " // Set all scratchpad clauses to true\n" + " for (uint j = 0; j < ccnt; j += glbsz) {\n" + " //uchar cond = (j + glbid) < ccnt;\n" + " // If ptr would go past end of array, set it to last element\n" + " // j = j * cond + (!cond) * (ccnt - glbid - 1);\n" + " if ((j + glbid) < ccnt) scratchpad[j + glbid] = 1;\n" + " }\n" + "\n" + " barrier(CLK_GLOBAL_MEM_FENCE);\n" + "\n" + " for (uint j = 0; j < cnt; j += glbsz) {\n" + " // uchar cond = (j + glbid) < cnt;\n" + " // Last element cap\n" + " // j = j * cond + (!cond) * (cnt - glbid - 1);\n" + " if ((j + glbid) < cnt) {\n" + " uint varind = vars[j + glbid];\n" + " varind = (vcnt - 1) - varind;\n" + " uint iind = varind >> 5U;\n" + " uint bind = varind & 0b11111U;\n" + " uchar cpar = (output[iind + 1] >> bind) & 1U;\n" + " if (cpar != pars[j + glbid]) scratchpad[clauses[j + glbid]] = 0;\n" + " }\n" + " }\n" + "\n" + " barrier(CLK_GLOBAL_MEM_FENCE);\n" + "\n" + "\n" + " for (uint j = 0; j < ccnt; j += glbsz) {\n" + " if (((j + glbid) < ccnt) && (scratchpad[j + glbid] == 1)) {\n" + " setmax = 1;\n" + " // printf(\"%u\\n\", (~output[1]) & 0b11111);\n" + " // printf(\"%u%u%u%u%u\\n\", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);\n" + " // printf(\"%u - %u\\n\", j + glbid, scratchpad[j + glbid]);\n" + " }\n" + "\n" + " // uchar cond = (j + glbid) < cnt;\n" + " // Last element cap\n" + " // j = j * cond + (!cond) * (cnt - glbid - 1);\n" + " // if (scratchpad[j + glbid] == 1) setmax = true;\n" + " }\n" + "\n" + " barrier(CLK_GLOBAL_MEM_FENCE);\n" + "\n" + " if (setmax) {\n" + " // Set maxval array to zero\n" + " maxvals[glbid] = 0;\n" + "\n" + " barrier(CLK_GLOBAL_MEM_FENCE);\n" + "\n" + " // Accumulate and reduce the maximums\n" + " for (uint j = 0; j < ccnt; j += glbsz) {\n" + " uint a = maxvals[glbid];\n" + " uint b = scratchpad[j + glbid] * lvars[j + glbid];\n" + " uint c = max(a, b);\n" + " if ((j + glbid) < ccnt) maxvals[glbid] = c;\n" + " }\n" + "\n" + " barrier(CLK_GLOBAL_MEM_FENCE);\n" + "\n" + " // Final reduction pass\n" + " uint maxj = maxvals[0];\n" + " for (uint j = 1; j < glbsz; ++j) {\n" + " maxj = max(maxj, maxvals[j]);\n" + " }\n" + "\n" + " // Add to the counter\n" + " if (glbid == 0) {\n" + " // printf(\"> %u\\n\", maxj);\n" + " stateaddpow(wcnt, output + 1, maxj);\n" + " // printf(\">> %u%u%u%u%u\\n\", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);\n" + " }\n" + "\n" + " // Check counter for overflow\n" + " if (output[wcnt] >= maxctr) {\n" + " output[0] = 1;\n" + " return;\n" + " }\n" + " } else {\n" + " // SAT. Set status and assignment.\n" + " output[0] = 0;\n" + " if (glbid == 0) {\n" + " for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];\n" + " }\n" + " return;\n" + " }\n" + " iter++;\n" + " }\n" + "}"; + + +static const size_t kernel_len = 4559; + +#define GLOBAL_SIZE (256) +#define LOCAL_SIZE (GLOBAL_SIZE) + +#define CHECKASGN (true) + +#define DEBUG + +i32 gpusolve(cnf* c) { + cl_platform_id platformid = NULL; + cl_device_id deviceid = NULL; + cl_uint numdevices; + cl_uint numplatforms; + + FILE *fp; + char *source_str; + size_t source_size; + + fp = fopen("../psat.cl", "r"); + if (!fp) { + fprintf(stderr, "Failed to load kernel.\n"); + exit(1); + } + source_str = (char*)malloc(0x100000); + source_size = fread( source_str, 1, 0x100000, fp); + fclose( fp ); + + u32 wordcnt = 1 + ((c->varcnt) >> 5U); + + u32* solution = calloc((wordcnt + 1), sizeof(u32)); + if (solution == NULL) { + printf("Failed to allocate solution buffer\n"); + exit(1); + } + + cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms); + if (res != CL_SUCCESS) { + printf("Failed to retrieve OpenCL platform IDs\n"); + exit(1); + } + printf("Found %u platforms\n", numplatforms); + res = clGetDeviceIDs(platformid, CL_DEVICE_TYPE_GPU, 1, &deviceid, &numdevices); + if (res != CL_SUCCESS) { + printf("Failed to retrieve OpenCL device IDs\n"); + exit(1); + } + printf("Found %u devices\n", numdevices); + + cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create OpenCL context\n"); + exit(1); + } + + cl_command_queue commqueue = clCreateCommandQueueWithProperties(context, deviceid, 0, &res); + if (res != CL_SUCCESS) { + printf("Failed to create OpenCL command queue\n"); + exit(1); + } + + // Device memory buffers: + /* For the CNF: + * {clausecnt, literalcnt, varcnt) + * variable array + * clause array + * parity array + * + * Other: + * Status + * A single counter + */ + + // TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer + cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * sizeof(cl_uint), NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create CNF header buffer\n"); + exit(1); + } + cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, c->clausecnt * sizeof(cl_uint), NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create CNF lvar buffer\n"); + exit(1); + } + cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create CNF variable buffer\n"); + exit(1); + } + cl_mem gpuclauses = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create CNF clause buffer\n"); + exit(1); + } + cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uchar), NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create CNF parity buffer\n"); + exit(1); + } + + // Allocate scratchpad memory + cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, c->clausecnt * sizeof(cl_uchar), NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create CNF subsumption scratchpad buffer\n"); + exit(1); + } + /* + cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create CNF maxval buffer\n"); + exit(1); + } + */ + + cl_mem gpuoutput = clCreateBuffer(context, CL_MEM_READ_WRITE, (wordcnt + 1) * sizeof(cl_uint), NULL, &res); + if (res != CL_SUCCESS) { + printf("Failed to create output buffer\n"); + exit(1); + } + + u32 cnfheader[3] = { c->litcnt, c->varcnt, c->clausecnt }; + + // Load buffers to GPU + res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 3 * sizeof(cl_uint), cnfheader, 0, NULL, NULL); + if (res != CL_SUCCESS) { + printf("Failed to queue CNF header write\n"); + exit(1); + } + res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, c->clausecnt * sizeof(cl_uint), c->lastvars, 0, NULL, NULL); + if (res != CL_SUCCESS) { + printf("Failed to queue CNF lvar write\n"); + exit(1); + } + res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->variables, 0, NULL, NULL); + if (res != CL_SUCCESS) { + printf("Failed to queue CNF variable write\n"); + exit(1); + } + res = clEnqueueWriteBuffer(commqueue, gpuclauses, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->clauses, 0, NULL, NULL); + if (res != CL_SUCCESS) { + printf("Failed to queue CNF clause write\n"); + exit(1); + } + res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->litcnt * sizeof(cl_uchar), c->pars, 0, NULL, NULL); + if (res != CL_SUCCESS) { + printf("Failed to queue CNF parity write\n"); + exit(1); + } + + const char* kernelptr = kernel_source; + + cl_program satprog = clCreateProgramWithSource(context, 1, (const char**) &source_str, (const size_t*) &source_size, &res); + if (res != CL_SUCCESS) { + printf("Failed to create OpenCL program\n"); + exit(1); + } + + res = clBuildProgram(satprog, 1, &deviceid, NULL, NULL, NULL); + if (res != CL_SUCCESS) { + char* logbuf = malloc(sizeof(char) * 65536); + size_t loglen = 0; + res = clGetProgramBuildInfo(satprog, deviceid, CL_PROGRAM_BUILD_LOG, sizeof(char) * 65536, logbuf, &loglen); + if (res != CL_SUCCESS) { + printf("Failed to retrieve build logs\n"); + exit(1); + } + printf("Build failed\n"); + printf("%s\n", logbuf); + free(logbuf); + exit(1); + } + + cl_kernel kernel = clCreateKernel(satprog, "vectorSAT", &res); + if (res != CL_SUCCESS) { + printf("Failed to create kernel\n"); + exit(1); + } + + res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader); + res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars); + res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables); + res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuclauses); + res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuparities); + + res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuoutput); + + res = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*) &gpuscratchpad); + res = clSetKernelArg(kernel, 7, LOCAL_SIZE * sizeof(cl_uint), NULL); + + + u64 starttime = utime(); + size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE }; + res = clEnqueueNDRangeKernel(commqueue, kernel, 1, NULL, itemsize, itemsize + 1, 0, NULL, NULL); + if (res != CL_SUCCESS) { + printf("Failed to queue kernel for execution\n"); + exit(res); + } + + + + res = clEnqueueReadBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL); + if (res != CL_SUCCESS) { + printf("Failed to read kernel output\n"); + exit(1); + } + u64 endtime = utime(); + + if (solution[0] == 1) { + printf("UNSAT\n"); + } else if (solution[0] == 0) { + printf("SAT\n"); + for (u32 k = 0; k < c->varcnt; ++k) { + u32 vind = (c->varcnt - 1) - k; + u32 iind = vind >> 5U; + u32 bind = vind & 0b11111U; + u8 par = (solution[iind + 1] >> bind) & 1U; + printf("%u", par); + } + printf("\n"); + if (CHECKASGN) { + u8* assigncheck = calloc(c->clausecnt, sizeof(u8)); + for (u32 i = 0; i < c->litcnt; ++i) { + u32 g = ((c->varcnt - 1) - c->variables[i]) >> 5U; + u32 h = ((c->varcnt - 1) - c->variables[i]) & 0b11111U; + u8 paract = (solution[g + 1] >> h) & 1U; + if (c->pars[i] == paract) assigncheck[c->clauses[i]] = true; + } + for (u32 i = 0; i < c->clausecnt; ++i) { + if (!assigncheck[i]) { + printf("Failed assignment check\n"); + exit(1); + } + } + free(assigncheck); + printf("Passed assignment check\n"); + } + } else { + printf("What the fuck???\n"); + exit(1); + } + printf("Actual time: %f seconds\n", ((f64) (endtime - starttime)) / 1000000.0); + + res = clFlush(commqueue); + res = clFinish(commqueue); + res = clReleaseKernel(kernel); + res = clReleaseProgram(satprog); + res = clReleaseMemObject(gpuheader); + res = clReleaseMemObject(gpulvars); + res = clReleaseMemObject(gpuvariables); + res = clReleaseMemObject(gpuclauses); + res = clReleaseMemObject(gpuparities); + res = clReleaseMemObject(gpuoutput); + res = clReleaseMemObject(gpuscratchpad); + res = clReleaseCommandQueue(commqueue); + res = clReleaseContext(context); + i32 retval = solution[0]; + free(solution); + free(source_str); + + return retval; +} \ No newline at end of file diff --git a/gpusolver.h b/gpusolver.h new file mode 100644 index 0000000..65e3728 --- /dev/null +++ b/gpusolver.h @@ -0,0 +1,4 @@ +#pragma once +#include "cnf.h" + +i32 gpusolve(cnf* c); \ No newline at end of file diff --git a/main.c b/main.c new file mode 100644 index 0000000..8f4372f --- /dev/null +++ b/main.c @@ -0,0 +1,47 @@ +#include +#include "cnf.h" +#include "gpusolver.h" +#include "time.h" + +int main() { + /* + srand( utime()); + u32 cnt = 4096; + printf("p cnf %u %u\n", cnt, cnt); + for (u32 i = 0; i < cnt; ++i) { + printf("%s%u 0\n", (rand() & 1U) ? ("") : ("-"), i + 1); + if ((i & 0xFF) == 0) { + srand(utime() ^ rand()); + } + } + return 0; + */ + /* + for (uint i = 0; i < 1000; ++i) { + char buf[128]; + i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1); + + + + } + */ + cnf* c = readDIMACS("/home/lev/Downloads/uf50/uf50-088.cnf"); + // printf("%u\n", c->litcnt); + + // for (u32 i = 0; i < c->clausecnt; ++i) printf("%u ", c->lastvars[i]); + //printf("\ntomato\n"); + sortlastnum(c, c->litcnt); + // printf("%u\n", c->lastvars[c->clauses[0]]); + + //printf("%u %u %u\n\n", c->litcnt, c->varcnt, c->clausecnt); + u64 start = utime(); + i32 res = gpusolve(c); + u64 stop = utime(); + // printf("%s : %d\n", buf, res); + printf("Took %f seconds\n", ((f64) (stop - start)) / 1000000.0); + // printf("please don't\n"); + // if (res == 1) break; + freecnf(c); + + return 0; +} diff --git a/psat.cl b/psat.cl new file mode 100644 index 0000000..c29dcff --- /dev/null +++ b/psat.cl @@ -0,0 +1,151 @@ + +#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable + +#define DEBUG + +#ifdef DEBUG +#define DBG(X) printf("%s\n", (X)) +#else +#define DBG(X) do {} while (0) +#endif + +static inline void printbits(uint a) { + for (uint i = 0; i < 32; ++i) { + uint ind = 31 - i; + printf("%u", (a >> ind) & 1U); + } +} + +static inline void stateaddpow(uint wcnt, uint* state, uint pow) { + uint corpow = pow & 0b11111U; + uint startind = pow >> 5U; + uint tr = 1U << corpow; + uint tval = state[startind] + tr; + bool carry = tval < state[startind]; + state[startind] = tval; + if (carry) { + uint i = startind + 1; + for ( ; i < wcnt; ++i) { + state[i]++; + if (state[i]) break; + } + } +} + +__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) { + output[0] = 2; + + __local uint setmax; + + uint cnt = cnfheader[0]; + uint vcnt = cnfheader[1]; + uint ccnt = cnfheader[2]; + + uint wcnt = 1 + (vcnt >> 5U); + + uint maxctr = 1U << (vcnt & 0b11111U); + + //uint glbid = get_global_id(0); + //uint glbsz = get_global_size(0); + + + uint locid = get_local_id(0); + uint locsz = get_local_size(0); + // uint grpid = get_group_id(0); + // uint grpcn = get_num_groups(0); + + // Zero out the counter + for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0; + + // Set all scratchpad clauses to true + for (uint j = 0; j < ccnt; j += locsz) { + uchar cond = (j + locid) < ccnt; + j = j * cond + (!cond) * (ccnt - locid - 1); + scratchpad[j + locid] = 1; + } + + __local uint firstind[1]; + while (output[0] == 2) { + firstind[0] = ccnt; + + setmax = 0; + uint maxnumx = 0; + + for (uint j = 0; j < cnt; j += locsz) { + + uchar cond = (j + locid) < cnt; + // Last element cap + j = j * cond + (!cond) * (cnt - locid - 1); + uint varind = vars[j + locid]; + varind = (vcnt - 1) - varind; + uint iind = varind >> 5U; + uint bind = varind & 0b11111U; + uchar cpar = (output[iind + 1] >> bind) & 1U; + + if (cpar != pars[j + locid]) { + scratchpad[clauses[j + locid]] = 0; + } + } + + barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + + + for (uint j = 0; j < ccnt; j += locsz) { + if (scratchpad[j + locid] == 1 && (j + locid) < ccnt) { + setmax = 1; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (setmax) { + // Set maxval array to zero + maxvals[locid] = 0; + + // Accumulate and reduce the maximums + for (uint j = 0; j < ccnt; j += locsz) { + //uint a = maxvals[locid]; + //uint b = lvars[j + locid]; + // uint c = max(a, b); + if ((j + locid) < ccnt && scratchpad[j + locid] == 1) { + //maxvals[locid] = c; + atomic_min(firstind, (j + locid)); + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + uint maxj = lvars[firstind[0]]; + + // Set all scratchpad clauses to true + for (uint j = 0; j < ccnt; j += locsz) { + uchar cond = (j + locid) < ccnt; + j = j * cond + (!cond) * (ccnt - locid - 1); + scratchpad[j + locid] = 1; + } + + // Final reduction pass + /* + uint maxj = maxvals[0]; + for (uint j = 1; j < locsz; ++j) { + maxj = max(maxj, maxvals[j]); + } + */ + + // Add to the counter + if (locid == 0) { + stateaddpow(wcnt, output + 1, maxj); + } + + if (output[wcnt] >= maxctr) { + output[0] = 1; + } + } else { + output[0] = 0; + if (locid == 0) { + for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } +} \ No newline at end of file diff --git a/time.c b/time.c new file mode 100644 index 0000000..f380c18 --- /dev/null +++ b/time.c @@ -0,0 +1,35 @@ +#include "time.h" +#include +#include +#include + +struct timeval tv; + +u64 utime() { + gettimeofday(&tv, 0); + return tv.tv_sec * 1000000LU + tv.tv_usec; +} + +u64 mtime() { + gettimeofday(&tv, 0); + return tv.tv_sec * 1000LU + (tv.tv_usec / 1000); +} + +i32 msleep(u64 msec) { + struct timespec ts; + i32 res; + + if (msec < 0) { + errno = EINVAL; + return -1; + } + + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + + do { + res = nanosleep(&ts, &ts); + } while (res && errno == EINTR); + + return res; +} \ No newline at end of file diff --git a/time.h b/time.h new file mode 100644 index 0000000..64de2ba --- /dev/null +++ b/time.h @@ -0,0 +1,6 @@ +#pragma once +#include "types.h" + +u64 utime(); +u64 mtime(); +i32 msleep(u64 msec); \ No newline at end of file diff --git a/types.h b/types.h new file mode 100644 index 0000000..4adcbaf --- /dev/null +++ b/types.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +typedef float f32; +typedef double f64;