diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f71281..de2aef4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.22) project(psat C) set(CMAKE_C_STANDARD 99) -set(CMAKE_C_FLAGS "-O3") +# set(CMAKE_C_FLAGS "-O3") -add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h) +add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h) target_link_libraries(psat -lOpenCL) \ No newline at end of file diff --git a/cnf.c b/cnf.c index cdd7059..b5c6710 100644 --- a/cnf.c +++ b/cnf.c @@ -13,15 +13,19 @@ cnf* readDIMACS(char* path) { CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n"); + // 'c' marks the beginning of a comment line in DIMACs. Skip this line while (buf[0] == 'c') { CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n"); } + // The first non-comment line is the header, should have format 'p cnf [varcnt] [clausecnt] + // We skip the first bit by iterating and skipping any character that's p, c, n, f, or space char* temp = buf; while (*temp == ' ' || *temp == 'p' || *temp == 'c' || *temp == 'n' || *temp == 'f') { temp++; } + // Char by char we read in the number of variables c->varcnt = 0; while (((u8) (*temp - '0')) < 10) { c->varcnt *= 10; @@ -29,8 +33,10 @@ cnf* readDIMACS(char* path) { temp++; } + // Skip any trailing whitespace while (*temp == ' ') temp++; + // Read in clausecnt c->clausecnt = 0; while (((u8) (*temp - '0')) < 10) { c->clausecnt *= 10; @@ -38,6 +44,9 @@ cnf* readDIMACS(char* path) { temp++; } + // Resize line buffer: the maximum clause size is varcnt, the maximum size of a u32 in decimal + // is 10, and conservatively given 2 extra characters for the - symbol and space delimiting, + // resizing the buffer to this should mean we never encounter a clause we fail to read. u32 nsize = sizeof(char) * (c->varcnt * 12LU) + 10; char* nbuf = realloc(buf, nsize); CHECK(nbuf, "Failed to realloc read buffer\n"); @@ -47,6 +56,9 @@ cnf* readDIMACS(char* path) { u32 cnt = 0; u32 cap = 32; + // We store separate arrays: literal i of the CNF has a parity at index i in one array + // the variable it corresponds to at the same index in another + // And the clause it's a member of in a third c->variables = calloc( cap, sizeof(u32)); CHECK(c->variables, "Failed to allocate literal variables\n"); c->clauses = calloc( cap, sizeof(u32)); @@ -60,15 +72,15 @@ cnf* readDIMACS(char* path) { for (u32 i = 0; i < c->clausecnt; ++i) { CHECK(fgets(buf, nsize, f), "Failed to read clause\n"); - - temp = buf; while (*temp == ' ') temp++; + // Iterate through char by char bool empty = true; bool tr = true; while (*temp != '\n') { if (cnt == cap) { + // Out of space to add more literals, realloc arrays u32 ncap = cap << 1U; c->variables = realloc(c->variables, sizeof(u32) * ncap); memset(c->variables + cap, 0, sizeof(u32) * cap); @@ -83,11 +95,15 @@ cnf* readDIMACS(char* path) { } if (*temp == '-') { + // Mark that the literal currently being read is negated tr = false; } else if (((u8) (*temp - '0')) < 10) { + // Read in the literal's digits c->variables[cnt] *= 10; c->variables[cnt] += (*temp - '0'); } else { + // Skip any whitespace and pack the read value into the arrays, along with + // any additional data while (temp[1] == ' ') temp++; c->pars[cnt] = tr; c->clauses[cnt] = i; @@ -109,6 +125,8 @@ cnf* readDIMACS(char* path) { } } + // Lastvars is set to the index of the last variable of each clause. However, because in the + // counter, the last variable has the lsb position, we flip the value of lastvars for (u32 i = 0; i < c->clausecnt; ++i) { c->lastvars[i] += 1U; c->lastvars[i] = c->varcnt - c->lastvars[i]; @@ -116,6 +134,7 @@ cnf* readDIMACS(char* path) { c->litcnt = cnt; + // Realloc the arrays to exactly match the number of literals c->variables = realloc(c->variables, sizeof(u32) * c->litcnt); c->clauses = realloc(c->clauses, sizeof(u32) * c->litcnt); c->pars = realloc(c->pars, sizeof(u8) * c->litcnt); @@ -152,10 +171,8 @@ const u64 RSIZE = 1LU << RBITS; const u64 RLVLS = (63LU / RBITS) + 1; const u64 RMASK = RSIZE - 1; -// Currently sorting array of program pointers by the score of the programs -// I need to sort by lastvar, within that sort by clause - void sortlastnum(cnf* c, u64 N) { + // Radix sort on the literals themselves based on their clause's bool v = false; u32* d = malloc(sizeof(u32) * c->litcnt); u32* d2 = malloc(sizeof(u32) * c->litcnt); @@ -170,7 +187,8 @@ void sortlastnum(cnf* c, u64 N) { u64 shift = pass * RBITS; for (u64 i = 0; i < N; ++i) { - u32 val = UINT32_MAX - c->lastvars[(c->clausecnt - 1) - c->clauses[i]]; + // U32MAX - because we want to sort highest to lowest, not lowest to highest + u32 val = UINT32_MAX - c->lastvars[c->clauses[i]]; u64 ind = (val >> shift) & RMASK; qe[pass * RSIZE + ind]++; } @@ -185,7 +203,7 @@ void sortlastnum(cnf* c, u64 N) { } for (u64 i = 0; i < N; ++i) { - u32 val = UINT32_MAX - c->lastvars[(c->clausecnt - 1) - c->clauses[i]]; + u32 val = UINT32_MAX - c->lastvars[c->clauses[i]]; u64 ind = (val >> shift) & RMASK; d[qb[pass * RSIZE + ind]] = c->variables[i]; d2[qb[pass * RSIZE + ind]] = c->clauses[i]; @@ -196,6 +214,7 @@ void sortlastnum(cnf* c, u64 N) { __builtin_prefetch(d3 + qb[pass * RSIZE + ind] + 1); } + // Every iteration we swap pointers to avoid doing a memcpy u32* tptr = c->variables; c->variables = d; d = tptr; @@ -208,6 +227,8 @@ void sortlastnum(cnf* c, u64 N) { v = !v; } + // If the pointers are still swapped at the end, we swap them to their original locations and + // copy over the result if (v) { u32* tptr = c->variables; c->variables = d; @@ -226,11 +247,13 @@ void sortlastnum(cnf* c, u64 N) { free(d2); free(d3); + // Initialize an array with all the clause indices in their current order u32* swaparr = malloc(sizeof(u32) * c->clausecnt); for (u32 i = 0; i < c->clausecnt; ++i) { swaparr[i] = i; } + // Perform another radix sort! v = false; d = malloc(sizeof(u32) * c->clausecnt); @@ -241,6 +264,8 @@ void sortlastnum(cnf* c, u64 N) { u64 shift = pass * RBITS; for (u64 i = 0; i < c->clausecnt; ++i) { + // Sort highest to lowest by lastvar again, but this time sorting the indices in swaparr + // instead of sorting literals u32 val = UINT32_MAX - c->lastvars[swaparr[i]]; u64 ind = (val >> shift) & RMASK; qe[pass * RSIZE + ind]++; @@ -277,18 +302,25 @@ void sortlastnum(cnf* c, u64 N) { } //free(d); + // Swaparr now has each clause index value at the correct index for where + // it appears in the CNF now that it has been sorted + // We'll now invert the permutation in swaparr for (uint i = 0; i < c->clausecnt; ++i) { d[swaparr[i]] = i; } + // giving us a lookup table from oldindex to newindex + // Which we then correct in all the literals for (uint i = 0; i < c->litcnt; ++i) { c->clauses[i] = d[c->clauses[i]]; } + // and then finally reorder the lastvars for (uint i = 0; i < c->clausecnt; ++i) { swaparr[d[i]] = c->lastvars[i]; } + // Copy the lastvars back into the correct array, clean up, and we're done! memcpy(c->lastvars, swaparr, sizeof(u32) * c->clausecnt); free(swaparr); free(d); diff --git a/gpusolver.c b/gpusolver.c index 52e8906..63aa37b 100644 --- a/gpusolver.c +++ b/gpusolver.c @@ -2,146 +2,6 @@ #include #include "time.h" - -static const char kernel_source[4560] = "static inline void stateaddpow(uint wcnt, uint* state, uint pow) {\n" - " uint corpow = pow & 0b11111U;\n" - " uint startind = pow >> 5U;\n" - " uint tr = 1U << corpow;\n" - " uint tval = state[startind] + tr;\n" - " uchar choice = !((tval > state[startind]) && (tval >= tr));\n" - " state[startind] = tval;\n" - " for (uint i = 0; i < wcnt; ++i) {\n" - " uchar cond = (i > startind);\n" - " state[i] += choice * cond;\n" - " choice = choice & (state[i] == 0) * cond + (!cond) & choice;\n" - " }\n" - "}\n" - "\n" - "__global static uint setmax;\n" - "\n" - "__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __global uint* maxvals) {\n" - " output[0] = 2;\n" - "\n" - " uint cnt = cnfheader[0];\n" - " uint vcnt = cnfheader[1];\n" - " uint ccnt = cnfheader[2];\n" - "\n" - " uint wcnt = 1 + (vcnt >> 5U);\n" - "\n" - " // Zero out the counter\n" - " for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;\n" - "\n" - " uint maxctr = 1U << (vcnt & 0b11111U);\n" - "\n" - " uint glbid = get_global_id(0);\n" - " uint glbsz = get_global_size(0);\n" - "\n" - " /*\n" - " uint locid = get_local_id(0);\n" - " uint locsz = get_local_size(0);\n" - " uint grpid = get_group_id(0);\n" - " uint grpcn = get_num_groups(0);\n" - " */\n" - "\n" - " bool done = false;\n" - " uint iter = 0;\n" - " while (output[0] == 2) {\n" - " // if (glbid == 0) printf(\"%s\\n\", \":~\");\n" - " setmax = 0;\n" - " uint maxnumx = 0;\n" - "\n" - " // Set all scratchpad clauses to true\n" - " for (uint j = 0; j < ccnt; j += glbsz) {\n" - " //uchar cond = (j + glbid) < ccnt;\n" - " // If ptr would go past end of array, set it to last element\n" - " // j = j * cond + (!cond) * (ccnt - glbid - 1);\n" - " if ((j + glbid) < ccnt) scratchpad[j + glbid] = 1;\n" - " }\n" - "\n" - " barrier(CLK_GLOBAL_MEM_FENCE);\n" - "\n" - " for (uint j = 0; j < cnt; j += glbsz) {\n" - " // uchar cond = (j + glbid) < cnt;\n" - " // Last element cap\n" - " // j = j * cond + (!cond) * (cnt - glbid - 1);\n" - " if ((j + glbid) < cnt) {\n" - " uint varind = vars[j + glbid];\n" - " varind = (vcnt - 1) - varind;\n" - " uint iind = varind >> 5U;\n" - " uint bind = varind & 0b11111U;\n" - " uchar cpar = (output[iind + 1] >> bind) & 1U;\n" - " if (cpar != pars[j + glbid]) scratchpad[clauses[j + glbid]] = 0;\n" - " }\n" - " }\n" - "\n" - " barrier(CLK_GLOBAL_MEM_FENCE);\n" - "\n" - "\n" - " for (uint j = 0; j < ccnt; j += glbsz) {\n" - " if (((j + glbid) < ccnt) && (scratchpad[j + glbid] == 1)) {\n" - " setmax = 1;\n" - " // printf(\"%u\\n\", (~output[1]) & 0b11111);\n" - " // printf(\"%u%u%u%u%u\\n\", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);\n" - " // printf(\"%u - %u\\n\", j + glbid, scratchpad[j + glbid]);\n" - " }\n" - "\n" - " // uchar cond = (j + glbid) < cnt;\n" - " // Last element cap\n" - " // j = j * cond + (!cond) * (cnt - glbid - 1);\n" - " // if (scratchpad[j + glbid] == 1) setmax = true;\n" - " }\n" - "\n" - " barrier(CLK_GLOBAL_MEM_FENCE);\n" - "\n" - " if (setmax) {\n" - " // Set maxval array to zero\n" - " maxvals[glbid] = 0;\n" - "\n" - " barrier(CLK_GLOBAL_MEM_FENCE);\n" - "\n" - " // Accumulate and reduce the maximums\n" - " for (uint j = 0; j < ccnt; j += glbsz) {\n" - " uint a = maxvals[glbid];\n" - " uint b = scratchpad[j + glbid] * lvars[j + glbid];\n" - " uint c = max(a, b);\n" - " if ((j + glbid) < ccnt) maxvals[glbid] = c;\n" - " }\n" - "\n" - " barrier(CLK_GLOBAL_MEM_FENCE);\n" - "\n" - " // Final reduction pass\n" - " uint maxj = maxvals[0];\n" - " for (uint j = 1; j < glbsz; ++j) {\n" - " maxj = max(maxj, maxvals[j]);\n" - " }\n" - "\n" - " // Add to the counter\n" - " if (glbid == 0) {\n" - " // printf(\"> %u\\n\", maxj);\n" - " stateaddpow(wcnt, output + 1, maxj);\n" - " // printf(\">> %u%u%u%u%u\\n\", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);\n" - " }\n" - "\n" - " // Check counter for overflow\n" - " if (output[wcnt] >= maxctr) {\n" - " output[0] = 1;\n" - " return;\n" - " }\n" - " } else {\n" - " // SAT. Set status and assignment.\n" - " output[0] = 0;\n" - " if (glbid == 0) {\n" - " for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];\n" - " }\n" - " return;\n" - " }\n" - " iter++;\n" - " }\n" - "}"; - - -static const size_t kernel_len = 4559; - #define GLOBAL_SIZE (256) #define LOCAL_SIZE (GLOBAL_SIZE) @@ -181,13 +41,13 @@ i32 gpusolve(cnf* c) { printf("Failed to retrieve OpenCL platform IDs\n"); exit(1); } - printf("Found %u platforms\n", numplatforms); + // printf("Found %u platforms\n", numplatforms); res = clGetDeviceIDs(platformid, CL_DEVICE_TYPE_GPU, 1, &deviceid, &numdevices); if (res != CL_SUCCESS) { printf("Failed to retrieve OpenCL device IDs\n"); exit(1); } - printf("Found %u devices\n", numdevices); + // printf("Found %u devices\n", numdevices); cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res); if (res != CL_SUCCESS) { @@ -289,8 +149,6 @@ i32 gpusolve(cnf* c) { exit(1); } - const char* kernelptr = kernel_source; - cl_program satprog = clCreateProgramWithSource(context, 1, (const char**) &source_str, (const size_t*) &source_size, &res); if (res != CL_SUCCESS) { printf("Failed to create OpenCL program\n"); @@ -329,8 +187,7 @@ i32 gpusolve(cnf* c) { res = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*) &gpuscratchpad); res = clSetKernelArg(kernel, 7, LOCAL_SIZE * sizeof(cl_uint), NULL); - - u64 starttime = utime(); + // u64 starttime = utime(); size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE }; res = clEnqueueNDRangeKernel(commqueue, kernel, 1, NULL, itemsize, itemsize + 1, 0, NULL, NULL); if (res != CL_SUCCESS) { @@ -338,14 +195,12 @@ i32 gpusolve(cnf* c) { exit(res); } - - res = clEnqueueReadBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL); if (res != CL_SUCCESS) { printf("Failed to read kernel output\n"); exit(1); } - u64 endtime = utime(); + // u64 endtime = utime(); if (solution[0] == 1) { printf("UNSAT\n"); @@ -370,7 +225,7 @@ i32 gpusolve(cnf* c) { for (u32 i = 0; i < c->clausecnt; ++i) { if (!assigncheck[i]) { printf("Failed assignment check\n"); - exit(1); + solution[0] = 4; } } free(assigncheck); @@ -378,14 +233,18 @@ i32 gpusolve(cnf* c) { } } else { printf("What the fuck???\n"); - exit(1); + solution[0] = 3; } - printf("Actual time: %f seconds\n", ((f64) (endtime - starttime)) / 1000000.0); + // printf("Actual time: %f seconds\n", ((f64) (endtime - starttime)) / 1000000.0); res = clFlush(commqueue); res = clFinish(commqueue); res = clReleaseKernel(kernel); res = clReleaseProgram(satprog); + + res = clReleaseCommandQueue(commqueue); + res = clReleaseContext(context); + res = clReleaseMemObject(gpuheader); res = clReleaseMemObject(gpulvars); res = clReleaseMemObject(gpuvariables); @@ -393,8 +252,9 @@ i32 gpusolve(cnf* c) { res = clReleaseMemObject(gpuparities); res = clReleaseMemObject(gpuoutput); res = clReleaseMemObject(gpuscratchpad); - res = clReleaseCommandQueue(commqueue); - res = clReleaseContext(context); + + res = clReleaseDevice(deviceid); + i32 retval = solution[0]; free(solution); free(source_str); diff --git a/main.c b/main.c index 8f4372f..9b3e030 100644 --- a/main.c +++ b/main.c @@ -2,6 +2,7 @@ #include "cnf.h" #include "gpusolver.h" #include "time.h" +#include "tests/masterTest.h" int main() { /* @@ -25,6 +26,8 @@ int main() { } */ + + /* Expects a path to a DIMACS file */ cnf* c = readDIMACS("/home/lev/Downloads/uf50/uf50-088.cnf"); // printf("%u\n", c->litcnt); diff --git a/tests/masterTest.c b/tests/masterTest.c new file mode 100644 index 0000000..6a7af0f --- /dev/null +++ b/tests/masterTest.c @@ -0,0 +1,87 @@ +#include "masterTest.h" +#include +#include "../cnf.h" +#include "../gpusolver.h" +#include "../time.h" + +i32 runTests() { + i32 res = runuf20(); + if (res != 0) return res; + res = runuf50(); + if (res != 0) return res; + res = runuuf50(); + return res; +} + + +i32 runuf20() { + printf("Running against uf20\n"); + u32 passed = 0; + u64 tottime = 0; + for (u32 i = 0; i < 1000; ++i) { + char buf[128]; + i32 len = sprintf(buf, "/home/lev/Downloads/uf20/uf20-0%u.cnf", i + 1); + + cnf* c = readDIMACS(buf); + + sortlastnum(c, c->litcnt); + + u64 start = utime(); + i32 res = gpusolve(c); + u64 stop = utime(); + tottime += (stop - start); + + freecnf(c); + if (res == 0) passed++; + } + printf("Passed %u / 1000 tests\n", passed); + printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); +} + +i32 runuf50() { + printf("Running against uf50\n"); + u32 passed = 0; + u64 tottime = 0; + for (u32 i = 0; i < 1000; ++i) { + char buf[128]; + i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1); + + cnf* c = readDIMACS(buf); + + sortlastnum(c, c->litcnt); + + u64 start = utime(); + i32 res = gpusolve(c); + u64 stop = utime(); + tottime += (stop - start); + + freecnf(c); + if (res == 0) passed++; + } + printf("Passed %u / 1000 tests\n", passed); + printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); +} + +i32 runuuf50() { + printf("Running against uuf50\n"); + u32 passed = 0; + u64 tottime = 0; + for (u32 i = 0; i < 1000; ++i) { + char buf[128]; + i32 len = sprintf(buf, "/home/lev/Downloads/uuf50/uuf50-0%u.cnf", i + 1); + + cnf* c = readDIMACS(buf); + + sortlastnum(c, c->litcnt); + + u64 start = utime(); + i32 res = gpusolve(c); + u64 stop = utime(); + tottime += (stop - start); + + freecnf(c); + if (res == 0) passed++; + } + printf("Passed %u / 1000 tests\n", passed); + printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); +} \ No newline at end of file diff --git a/tests/masterTest.h b/tests/masterTest.h new file mode 100644 index 0000000..62b01c4 --- /dev/null +++ b/tests/masterTest.h @@ -0,0 +1,8 @@ +#pragma once +#include "../types.h" + +i32 runTests(); + +i32 runuf20(); +i32 runuf50(); +i32 runuuf50(); \ No newline at end of file