From a1b6cdaea91772fcbdc1926600f700cf657a10fa Mon Sep 17 00:00:00 2001 From: gothictomato Date: Thu, 18 Aug 2022 22:04:52 -0400 Subject: [PATCH 1/2] Progress on early subsumption Signed-off-by: gothictomato --- CMakeLists.txt | 6 +- cnf.c | 4 +- cnf.h | 4 +- gpusolver.c | 5 + main.c | 293 ++++++++++++++++++++++++++++++++++++++++++++- ncnf.c | 210 ++++++++++++++++++++++++++++++++ ncnf.h | 25 ++++ rng.c | 121 +++++++++++++++++++ rng.h | 15 +++ tests/masterTest.c | 9 +- 10 files changed, 679 insertions(+), 13 deletions(-) create mode 100644 ncnf.c create mode 100644 ncnf.h create mode 100644 rng.c create mode 100644 rng.h diff --git a/CMakeLists.txt b/CMakeLists.txt index de2aef4..c0aa84e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.22) project(psat C) set(CMAKE_C_STANDARD 99) -# set(CMAKE_C_FLAGS "-O3") +# set(CMAKE_C_FLAGS "-mavx2 -O3 -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts -ftree-vectorize -ftracer -funroll-all-loops ") -add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h) +add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h ncnf.c ncnf.h rng.h rng.c) -target_link_libraries(psat -lOpenCL) \ No newline at end of file +target_link_libraries(psat -lOpenCL -lgmp) \ No newline at end of file diff --git a/cnf.c b/cnf.c index b5c6710..be4aee0 100644 --- a/cnf.c +++ b/cnf.c @@ -1,5 +1,6 @@ #include "cnf.h" +/* cnf* readDIMACS(char* path) { cnf* c = malloc(sizeof(cnf)); CHECK(c, "Failed to alloc CNF struct\n"); @@ -324,4 +325,5 @@ void sortlastnum(cnf* c, u64 N) { memcpy(c->lastvars, swaparr, sizeof(u32) * c->clausecnt); free(swaparr); free(d); -} \ No newline at end of file +} + */ \ No newline at end of file diff --git a/cnf.h b/cnf.h index df8f7e6..1c63bd3 100644 --- a/cnf.h +++ b/cnf.h @@ -4,6 +4,7 @@ #include #include + #define CHECK(X, Y) if (X == NULL) { \ printf(Y); \ return NULL; \ @@ -20,6 +21,7 @@ typedef struct { u8* pars; } cnf; + cnf* readDIMACS(char* path); void printcnf(cnf* c); @@ -28,4 +30,4 @@ void printcnf(cnf* c); void sortlastnum(cnf* c, u64 N); -void freecnf(cnf* c); \ No newline at end of file +void freecnf(cnf* c); diff --git a/gpusolver.c b/gpusolver.c index 63aa37b..45a4e7a 100644 --- a/gpusolver.c +++ b/gpusolver.c @@ -176,6 +176,11 @@ i32 gpusolve(cnf* c) { exit(1); } + size_t maxworkgrpu = 0; + res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL); + + printf("Max work group size: %lu\n", maxworkgrpu); + res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader); res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars); res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables); diff --git a/main.c b/main.c index 9b3e030..455956b 100644 --- a/main.c +++ b/main.c @@ -1,10 +1,256 @@ #include -#include "cnf.h" -#include "gpusolver.h" +// #include "gpusolver.h" #include "time.h" -#include "tests/masterTest.h" +// #include "tests/masterTest.h" +#include "gmp.h" +#include "rng.h" +#include "ncnf.h" +#define ADD (0) +#define CMP (1) +#define CHK (2) + +void ctrthings2(cnf* c, u32* state, u32* ctr, u32* max) { + u32 wcnt = 1U + (c->cnts[0] / 32U); + u32* mode = state; + u32* index = state + 1; + u32* addval = state + 2; + + u32 varcnt = c->cnts[0] - 1; + u32 chkmsk = 0xFFFFFFFFU * (*mode == CHK); + u32 chkcls = *index & chkmsk; + u32 chkind = c->clausedat[3 * chkcls] + (*addval & chkmsk); + u32 var = c->variables[chkind]; + u8 par = c->parities[chkind]; + u32 vword = (varcnt - var) >> 5U; + u32 vbit = (varcnt - var) & 0b11111U; + u8 corpar = (ctr[vword] >> vbit) & 1U; + u8 isvalid = (par == corpar); + u8 islvar = ((*addval + 1) == c->clausedat[3 * chkcls + 1]); + if (*mode == CHK) { + // printf("> %u %u\n", *index, *addval); + // printf("%u %u %u\n", var, par, corpar); + // printf("%u %u\n", chkcls, chkind); + // printf("%u %u %u\n", islvar, isvalid, c->clausedat[3 * chkcls + 1]); + + /* + * if last var + * if valid, add + * if invalid, iterate clause + * else + * if valid, iterate addval up to len + * if invalid, iterate claus + * If current var is valid: + * if last var: + * + */ + if (isvalid) { + if (islvar) { + u32 j = c->clausedat[3 * chkcls + 2]; + *mode -= 2; + printf("j: %u\n", j); + *index = j >> 5U; + *addval = 1U << (j & 0b11111U); + } else { + *addval += 1U; + } + } else { + *addval = 0; + *index += 1U; + if (*index == c->cnts[1]) { + printf("SAT\n"); + *mode = 4; + return; + } + } + } /* else { + + //printf("YEET\n"); + if (*index >= wcnt) printf("FUCK\n"); + u32 nval = ctr[*index] + *addval; // Find the result of the current step if it was addition + *addval = (nval < ctr[*index]) * (*mode == ADD); // If in add mode, set addval to carry, else set 0 + ctr[*index] = nval * (*mode == ADD) + ctr[*index] * (*mode != ADD); // If in add mode, set new ctr val, otherwise leave unchanged + *addval -= (ctr[*index] < max[*index]) * (*mode == CMP); // If in comparison mode, decrement addval if less than + *addval += (ctr[*index] > max[*index]) * (*mode == CMP); // If in comparison mode, increment addval if greater than + bool addcond = (*addval == 0) | (*index == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word + bool cmpcond = (*addval != 0) | (*index == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word + if (*mode == CMP && cmpcond && *addval != -1) { // If in cmpmode and the comparison result is not less than, unsat + printf("UNSAT\n"); + *mode = 4; + return; + } + bool cmpdone = cmpcond * (*mode == CMP); // if comparison completion conditions are satisfied and in CMP mode + u32 addindex = (*index + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1 + *index = addindex * (*mode == ADD) + (*index - 1) * (*mode == CMP); // If in add mode, use addindex; if in cmp mode, decrement index by 1 + *index *= !cmpdone; + // Leave adval alone if: + // not in add mode + // add mode isn't done + // not in cmp mode + // cmp mode isn't done + // + *addval *= !(((addcond) & (*mode != ADD)) & cmpdone); // If add is complete, zero addval, else leave unchanged + *mode += addcond * (*mode == ADD) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode. + + } + */ + + u32 cmpaddind = *index * (*mode != CHK); + if (cmpaddind >= wcnt) printf("FUCK\n"); + u32 nval = ctr[cmpaddind] + *addval; // Find the result of the current step if it was addition + *addval = (nval < ctr[cmpaddind]) * (*mode == ADD) + (*addval) * (*mode == CHK); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone. + ctr[cmpaddind] = nval * (*mode == ADD) + ctr[cmpaddind] * (*mode != ADD); // If in add mode, set new ctr val, otherwise leave unchanged + *addval -= (ctr[cmpaddind] < max[cmpaddind]) * (*mode == CMP); // If in comparison mode, decrement addval if less than + *addval += (ctr[cmpaddind] > max[cmpaddind]) * (*mode == CMP); // If in comparison mode, increment addval if greater than + bool addcond = (*addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word + bool cmpcond = (*addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word + if (*mode == CMP && cmpcond && *addval != -1) { // If in cmpmode and the comparison result is not less than, unsat + printf("UNSAT\n"); + *mode = 4; + return; + } + bool cmpdone = cmpcond & (*mode == CMP); // if comparison completion conditions are satisfied and in CMP mode + u32 addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1 + *index = addindex * (*mode == ADD) + (*index - (*mode == CMP)) * (*mode != ADD); // If in add mode, use addindex; if in cmp mode, decrement index by 1 + *index *= !cmpdone; + // Leave adval alone if: + // not in add mode + // add mode isn't done + // not in cmp mode + // cmp mode isn't done + // + *addval *= !(((addcond) & (*mode == ADD)) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged. + *mode += addcond * (*mode == ADD) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode. + +} + +void printbits(unsigned a) { + for (unsigned i = 0; i < 32; ++i) { + unsigned ind = 31 - i; + printf("%u", (a >> ind) & 1U); + } +} + +#define TESTS (274877906944LU >> 10U) +#define CSZE (83LU) +#define eqprob (0.01f) int main() { + /* + printf("Tests: %lu\n", TESTS); + rngstate rng; + u64 rseed = utime(); + seed(&rng, rseed); + printf("Seed: %lu\n", rseed); + + mpz_t a, b, c, d, e; + + mpz_inits(a, b, c, d, e, NULL); + + u32 ctrp[CSZE]; + u32 maxp[CSZE]; + + u32* ctr = ctrp; + u32* max = maxp; + + u32 state[3]; + u32 hdr[2]; + + char buf[4096]; + + + for (u64 i = 0; i < TESTS; ++i) { + memset(ctr, 0, sizeof(u32) * CSZE); + memset(max, 0, sizeof(u32) * CSZE); + + //u32 lenval = ru32(&rng) % CSZE; + u32 lenval = CSZE; + if (lenval < 2) lenval = 2; + u32 jval = ru32(&rng) % ((lenval - 1) * 32); + + for (u32 j = 0; j < lenval - 2; ++j) { + ctr[j] = ru32(&rng); + max[j] = ru32(&rng); + } + + mpz_import(a, lenval, -1, sizeof(u32), 0, 0, ctr); + mpz_import(b, lenval, -1, sizeof(u32), 0, 0, max); + + state[1] = jval >> 5U; + state[2] = jval & 0b11111U; + state[2] = 1U << state[2]; + mpz_ui_pow_ui(c, 2, jval); + + + if (rf32(&rng) < eqprob && mpz_cmp) { + mpz_sub(a, b, c); + if (mpz_sgn(a) != -1) { + mpz_export(ctr, NULL, -1, sizeof(u32), 0, 0, a); + } else { + mpz_import(a, lenval, -1, sizeof(u32), 0, 0, ctr); + } + } + + mpz_add(d, a, c); + + state[0] = 0; + + while (state[0] < 2U) { + ctrthings2(hdr, lenval, state, ctr, max); + } + + mpz_import(c, lenval, -1, sizeof(u32), 0, 0, ctr); + + + + i32 res = mpz_cmp(d, b); + + if (res == -1) { + if (state[2] != -1) { + printf("Fuck2 %lu\n", i); + printf("d: "); + mpz_out_str(stdout, 10, d); + + printf("\nb: "); + mpz_out_str(stdout, 10, b); + printf("\n"); + printf("mode: %u\n", state[2]); + exit(0); + } + } else { + if (state[2] != 2) { + printf("Fuck3 %lu\n", i); + printf("d: "); + mpz_out_str(stdout, 10, d); + printf("\nc: "); + mpz_out_str(stdout, 10, c); + printf("\nb: "); + mpz_out_str(stdout, 10, b); + printf("\n"); + printf("mode: %u\n", state[2]); + exit(0); + } + } + + res = mpz_cmp(d, c); + if (res != 0) { + printf("Fuck %lu\na: ", i); + mpz_out_str(stdout, 10, a); + printf("\nd: "); + mpz_out_str(stdout, 10, d); + printf("\nc: "); + mpz_out_str(stdout, 10, c); + printf("\n%u %u\n", lenval, jval); + exit(0); + } + } + + + + mpz_clears(a, b, c, d, e, NULL); + */ + + + /* srand( utime()); u32 cnt = 4096; @@ -28,7 +274,43 @@ int main() { */ /* Expects a path to a DIMACS file */ - cnf* c = readDIMACS("/home/lev/Downloads/uf50/uf50-088.cnf"); + + + cnf* c = readDIMACS("/home/lev/Downloads/uf20/uf20-022.cnf"); + sortlastnum(c); + + // printcnf(c); + u32 wcnt = 1U + (c->cnts[0] / 32U); + + + u32* ctr = calloc(wcnt, sizeof(u32)); + u32* max = calloc(wcnt, sizeof(u32)); + + max[c->cnts[0] >> 5U] = 1U << (c->cnts[0] & 0b11111U); + + u32 state[3]; + + state[0] = 2; + state[1] = state[2] = 0; + + + u32 mtr = 0; + while (state[0] < 3U) { + u32 cmd = state[0]; + ctrthings2(c, state, ctr, max); + if (state[0] != 2 && cmd == 2) { + //printf("\n"); + //for (unsigned i = wcnt - 1; i < wcnt; --i) printbits(ctr[i]); + //printf("\n"); + //mtr++; + //if (mtr == 10) exit(15); + // printf("%u %u %u\n", state[0], state[1], state[2]); + } + } + + return 0; + /* + // printf("%u\n", c->litcnt); // for (u32 i = 0; i < c->clausecnt; ++i) printf("%u ", c->lastvars[i]); @@ -46,5 +328,6 @@ int main() { // if (res == 1) break; freecnf(c); - return 0; + return 0;*/ + } diff --git a/ncnf.c b/ncnf.c new file mode 100644 index 0000000..37b88b4 --- /dev/null +++ b/ncnf.c @@ -0,0 +1,210 @@ +#include "ncnf.h" + +cnf* readDIMACS(char* path) { + cnf* c = malloc(sizeof(cnf)); + CHECK(c, "Failed to alloc CNF struct\n") + + FILE* f = fopen(path, "r"); + CHECK(f, "Failed to open file\n") + + u32 bufsize = 1000; + char* buf = malloc(sizeof(char) * bufsize); + CHECK(buf, "Failed to alloc read buffer\n") + + CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n") + + // 'c' marks the beginning of a comment line in DIMACs. Skip this line + while (buf[0] == 'c') { + CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n") + } + + // The first non-comment line is the header, should have format 'p cnf [varcnt] [clausecnt] + // We skip the first bit by iterating and skipping any character that's p, c, n, f, or space + char* temp = buf; + while (*temp == ' ' || *temp == 'p' || *temp == 'c' || *temp == 'n' || *temp == 'f') { + temp++; + } + + // Char by char we read in the number of variables + u32* varcnt = &(c->cnts[0]); + *varcnt = 0; + while (((u8) (*temp - '0')) < 10) { + *varcnt *= 10; + *varcnt += (*temp - '0'); + temp++; + } + + // Skip any trailing whitespace + while (*temp == ' ') temp++; + + // Read in clausecnt + u32* clausecnt = &(c->cnts[1]); + *clausecnt = 0; + while (((u8) (*temp - '0')) < 10) { + *clausecnt *= 10; + *clausecnt += (*temp - '0'); + temp++; + } + + // Resize line buffer: the maximum clause size is varcnt, the maximum size of a u32 in decimal + // is 10, and conservatively given 2 extra characters for the - symbol and space delimiting, + // resizing the buffer to this should mean we never encounter a clause we fail to read. + u32 nsize = sizeof(char) * ((*varcnt) * 12LU) + 10; + char* nbuf = realloc(buf, nsize); + CHECK(nbuf, "Failed to realloc read buffer\n") + + buf = nbuf; + + u32 cnt = 0; + u32 cap = 32; + + c->clausedat = calloc(*clausecnt, sizeof(u32) * 3); + CHECK(c->clausedat, "Failed to allocate clause data\n") + c->variables = calloc(cap, sizeof(u32)); + CHECK(c->variables, "Failed to allocate literal variables\n") + c->parities = calloc(cap, sizeof(u8)); + CHECK(c->parities, "Failed to allocate literal parities\n"); + + for (u32 i = 0; i < *clausecnt; ++i) { + c->clausedat[3 * i] = cnt; + CHECK(fgets(buf, nsize, f), "Failed to read clause\n"); + + temp = buf; + while (*temp == ' ') temp++; + + bool empty = true; + bool tr = true; + while (*temp != '\n') { + if (cnt == cap) { + // Out of space to add more literals, realloc arrays + u32 ncap = cap << 1U; + c->variables = realloc(c->variables, sizeof(u32) * ncap); + memset(c->variables + cap, 0, sizeof(u32) * cap); + CHECK(c->variables, "Failed to realloc variable array\n"); + c->parities = realloc(c->parities, sizeof(u8) * ncap); + memset(c->parities + cap, 0, sizeof(u8) * cap); + CHECK(c->parities, "Failed to realloc parity array\n"); + cap = ncap; + } + + if (*temp == '-') { + tr = false; + } else if (((u8) (*temp - '0')) < 10) { + // Read in the literal's digits + c->variables[cnt] *= 10; + c->variables[cnt] += (*temp - '0'); + } else { + // Skip any whitespace and pack the read value into the arrays, along with + // any additional data + while (temp[1] == ' ') temp++; + c->parities[cnt] = tr; + c->variables[cnt] -= 1; + if (c->clausedat[3 * i + 2] < c->variables[cnt]) c->clausedat[3 * i + 2] = c->variables[cnt]; + empty = false; + tr = true; + cnt++; + if (temp[1] == '0') { + break; + } + } + temp++; + } + + if (empty) { + printf("UNSAT: Empty clause %u\n", i); + return NULL; + } + c->clausedat[3 * i + 1] = cnt - c->clausedat[3 * i]; + } + + // Lastvars is set to the index of the last variable of each clause. However, because in the + // counter, the last variable has the lsb position, we flip the value of lastvars + for (u32 i = 0; i < *clausecnt; ++i) { + c->clausedat[3 * i + 2] += 1U; + c->clausedat[3 * i + 2] = *varcnt - c->clausedat[3 * i + 2]; + } + + // Realloc the arrays to exactly match the number of literals + c->variables = realloc(c->variables, sizeof(u32) * cnt); + c->parities = realloc(c->parities, sizeof(u8) * cnt); + free(buf); + if (fclose(f)) { + printf("Failed to close file\n"); + return NULL; + } + return c; +} + +void printcnf(cnf* c) { + printf("p cnf %u %u\n", c->cnts[0], c->cnts[1]); + for (u32 i = 0; i < c->cnts[1]; ++i) { + for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) { + if (c->parities[c->clausedat[3 * i] + j] == 0) printf("-"); + printf("%u ", c->variables[c->clausedat[3 * i] + j] + 1); + } + printf("0\n"); + } +} + +void freecnf(cnf* c) { + free(c->clausedat); + free(c->variables); + free(c->parities); + free(c); +} + +const u64 RBITS = 8; +const u64 RSIZE = 1LU << RBITS; +const u64 RLVLS = (31LU / RBITS) + 1; +const u64 RMASK = RSIZE - 1; + +void sortlastnum(cnf* c) { + bool v = false; + u32* d = calloc(c->cnts[1], sizeof(u32) * 3); + + u32 qb[RLVLS * RSIZE]; + u32 qe[RLVLS * RSIZE]; + memset(qb, 0, sizeof(u32) * RLVLS * RSIZE); + memset(qe, 0, sizeof(u32) * RLVLS * RSIZE); + + for (u32 pass = 0; pass < RLVLS; ++pass) { + u32 shift = pass * RBITS; + + for (u32 i = 0; i < c->cnts[1]; ++i) { + u32 val = UINT32_MAX - c->clausedat[3 * i + 2]; + u32 ind = (val >> shift) & RMASK; + qe[pass * RSIZE + ind]++; + } + + u32 uc = 0; + for (u32 i = 0; i < RSIZE; ++i) if (qe[pass * RSIZE + i]) uc++; + if (uc == 1) continue; + + qb[pass * RSIZE] = 0; + for (u32 i = 1; i < RSIZE; ++i) { + qb[pass * RSIZE + i] = qb[pass * RSIZE + i - 1] + qe[pass * RSIZE + i - 1]; + } + + for (u32 i = 0; i < c->cnts[1]; ++i) { + u32 val = UINT32_MAX - c->clausedat[3 * i + 2]; + u32 ind = (val >> shift) & RMASK; + memcpy(d + 3 * qb[pass * RSIZE + ind], c->clausedat + 3 * i, sizeof(u32) * 3); + qb[pass * RSIZE + ind]++; + __builtin_prefetch(d + 3 * qb[pass * RSIZE + ind] + 1); + } + + u32* tptr = c->clausedat; + c->clausedat = d; + d = tptr; + v = !v; + } + + if (v) { + u32* tptr = c->clausedat; + c->clausedat = d; + d = tptr; + memcpy(c->clausedat, d, sizeof(u32) * 3 * c->cnts[1]); + } + + free(d); +} diff --git a/ncnf.h b/ncnf.h new file mode 100644 index 0000000..c61d80f --- /dev/null +++ b/ncnf.h @@ -0,0 +1,25 @@ +#pragma once +#include "types.h" +#include +#include +#include + +#define CHECK(X, Y) if (X == NULL) { \ + printf(Y); \ + return NULL; \ +} + +typedef struct { + u32 cnts[2]; // { varcnt, clausecnt } + u32* clausedat; // { ind, len, jval } + u32* variables; + u8* parities; +} cnf; + +cnf* readDIMACS(char* path); + +void printcnf(cnf* c); + +void sortlastnum(cnf* c); + +void freecnf(cnf* c); diff --git a/rng.c b/rng.c new file mode 100644 index 0000000..07da3ee --- /dev/null +++ b/rng.c @@ -0,0 +1,121 @@ +#include "rng.h" +#include +#include "time.h" +#include "stdlib.h" + +static inline u64 rotl(const u64 x, i32 k) { + return (x << k) | (x >> (64 - k)); +} + +void seed(rngstate* s, u64 seed) { + /* Unrolled & modified version of splitmix64 + * https://prng.di.unimi.it/splitmix64.c */ + u64 z = (seed += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + s->state[0] = z ^ (z >> 31); + + z = (seed += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + s->state[1] = z ^ (z >> 31); + + z = (seed += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + s->state[2] = z ^ (z >> 31); + + z = (seed += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + s->state[3] = z ^ (z >> 31); + + z = (seed += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + s->state[4] = z ^ (z >> 31); + + z = (seed += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + s->state[5] = z ^ (z >> 31); + + z = (seed += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + s->state[6] = z ^ (z >> 31); + + z = (seed + 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + s->state[7] = z ^ (z >> 31); + + s->ready = true; +} + +u64 ru64(rngstate* s) { + if (!s->ready) seed(s, utime()); + const u64 res = rotl(s->state[1] * 5, 7) * 9; + const u64 t = s->state[1] << 11U; + + s->state[2] ^= s->state[0]; + s->state[5] ^= s->state[1]; + s->state[1] ^= s->state[2]; + s->state[7] ^= s->state[3]; + s->state[3] ^= s->state[4]; + s->state[4] ^= s->state[5]; + s->state[0] ^= s->state[6]; + s->state[6] ^= s->state[7]; + + s->state[6] ^= t; + + s->state[7] = rotl(s->state[7], 21); + + return res; +} + +u64 ru64pow(rngstate* s, u64 max) { + u64 v = ru64(s) % max; + v++; + return max / v; +} + +u32 ru32(rngstate* s) { + return (u32) ru64(s); +} + +f64 rf64(rngstate* s) { + return ((f64) ru64(s)) / ((f64) UINT64_MAX); +} + +f32 rf32(rngstate* s) { + return ((f32) ru32(s)) / ((f32) UINT32_MAX); +} + +void jump(rngstate* s) { + static const u64 JUMP[] = { 0x33ed89b6e7a353f9, 0x760083d7955323be, 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db }; + u64 t[8]; + memset(t, 0, sizeof(u64) * 8); + for (i32 i = 0; i < 8; ++i) { + for (i32 b = 0; b < 64; ++b) { + if (JUMP[i] & 1LU << b) for (i32 w = 0; w < 8; ++w) t[w] ^= s->state[w]; + ru64(s); + } + } + + memcpy(s->state, t, sizeof(u64) * 8); +} + +void longjump(rngstate* s) { + static const u64 LONGJUMP[] = { 0x11467fef8f921d28, 0xa2a819f2e79c8ea8, 0xa8299fc284b3959a, 0xb4d347340ca63ee1, 0x1cb0940bedbff6ce, 0xd956c5c4fa1f8e17, 0x915e38fd4eda93bc, 0x5b3ccdfa5d7daca5 }; + u64 t[8]; + memset(t, 0, sizeof(u64) * 8); + for (i32 i = 0; i < 8; ++i) { + for (i32 b = 0; b < 64; ++b) { + if (LONGJUMP[i] & 1LU << b) for (i32 w = 0; w < 8; ++w) t[w] ^= s->state[w]; + ru64(s); + } + } + + memcpy(s->state, t, sizeof(u64) * 8); +} \ No newline at end of file diff --git a/rng.h b/rng.h new file mode 100644 index 0000000..c4351dd --- /dev/null +++ b/rng.h @@ -0,0 +1,15 @@ +#pragma once +#include "types.h" + +typedef struct { + bool ready; + u64 state[8]; +} rngstate; + +void seed(rngstate* s, u64 seed); +u64 ru64(rngstate* s); +u32 ru32(rngstate* s); +f64 rf64(rngstate* s); +f32 rf32(rngstate* s); +void jump(rngstate* s); +u64 ru64pow(rngstate* s, u64 max); \ No newline at end of file diff --git a/tests/masterTest.c b/tests/masterTest.c index 2868e17..471d369 100644 --- a/tests/masterTest.c +++ b/tests/masterTest.c @@ -33,7 +33,8 @@ i32 runuf20() { cnf* c = readDIMACS(buf); - sortlastnum(c, c->litcnt); + // TODO: Uncomment + // sortlastnum(c, c->litcnt); u64 start = utime(); i32 res = gpusolve(c); @@ -59,7 +60,8 @@ i32 runuf50() { cnf* c = readDIMACS(buf); - sortlastnum(c, c->litcnt); + // TODO: Uncomment + // sortlastnum(c, c->litcnt); u64 start = utime(); i32 res = gpusolve(c); @@ -85,7 +87,8 @@ i32 runuuf50() { cnf* c = readDIMACS(buf); - sortlastnum(c, c->litcnt); + // TODO: Uncomment + // sortlastnum(c, c->litcnt); u64 start = utime(); i32 res = gpusolve(c); From 4c111f4b6f0c73410ef88cf4ee030577c30804b7 Mon Sep 17 00:00:00 2001 From: gothictomato Date: Sun, 21 Aug 2022 12:49:48 -0400 Subject: [PATCH 2/2] Early subsumption complete Signed-off-by: gothictomato --- cnf.h | 2 + gpusolver.c | 108 +++++++++++++------------ gpusolver.h | 2 +- main.c | 178 ++++++++++++++++------------------------- ncnf.c | 1 + ncnf.h | 2 +- psat.cl | 192 +++++++++++++++++++++++---------------------- tests/masterTest.c | 115 +++++++++++++-------------- 8 files changed, 277 insertions(+), 323 deletions(-) diff --git a/cnf.h b/cnf.h index 1c63bd3..bd85b57 100644 --- a/cnf.h +++ b/cnf.h @@ -3,6 +3,7 @@ #include #include #include +/* #define CHECK(X, Y) if (X == NULL) { \ @@ -31,3 +32,4 @@ void sortlastnum(cnf* c, u64 N); void freecnf(cnf* c); +*/ \ No newline at end of file diff --git a/gpusolver.c b/gpusolver.c index 45a4e7a..1709c14 100644 --- a/gpusolver.c +++ b/gpusolver.c @@ -1,9 +1,11 @@ #include "gpusolver.h" #include #include "time.h" +#include "gmp.h" -#define GLOBAL_SIZE (256) -#define LOCAL_SIZE (GLOBAL_SIZE) + +#define LOCAL_SIZE (128) +#define GLOBAL_SIZE (1024) #define CHECKASGN (true) @@ -28,7 +30,7 @@ i32 gpusolve(cnf* c) { source_size = fread( source_str, 1, 0x100000, fp); fclose( fp ); - u32 wordcnt = 1 + ((c->varcnt) >> 5U); + u32 wordcnt = 1 + ((c->cnts[0]) >> 5U); u32* solution = calloc((wordcnt + 1), sizeof(u32)); if (solution == NULL) { @@ -36,6 +38,13 @@ i32 gpusolve(cnf* c) { exit(1); } + mpz_t gmpmax; + mpz_init(gmpmax); + mpz_ui_pow_ui(gmpmax, 2, c->cnts[0]); + mpz_div_ui(gmpmax, gmpmax, GLOBAL_SIZE); + mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax); + mpz_clear(gmpmax); + cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms); if (res != CL_SUCCESS) { printf("Failed to retrieve OpenCL platform IDs\n"); @@ -74,38 +83,27 @@ i32 gpusolve(cnf* c) { */ // TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer - cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * sizeof(cl_uint), NULL, &res); + cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 2 * sizeof(cl_uint), NULL, &res); if (res != CL_SUCCESS) { printf("Failed to create CNF header buffer\n"); exit(1); } - cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, c->clausecnt * sizeof(cl_uint), NULL, &res); + cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * c->cnts[1] * sizeof(cl_uint), NULL, &res); if (res != CL_SUCCESS) { printf("Failed to create CNF lvar buffer\n"); exit(1); } - cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res); + cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uint), NULL, &res); if (res != CL_SUCCESS) { printf("Failed to create CNF variable buffer\n"); exit(1); } - cl_mem gpuclauses = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res); - if (res != CL_SUCCESS) { - printf("Failed to create CNF clause buffer\n"); - exit(1); - } - cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uchar), NULL, &res); + cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uchar), NULL, &res); if (res != CL_SUCCESS) { printf("Failed to create CNF parity buffer\n"); exit(1); } - // Allocate scratchpad memory - cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, c->clausecnt * sizeof(cl_uchar), NULL, &res); - if (res != CL_SUCCESS) { - printf("Failed to create CNF subsumption scratchpad buffer\n"); - exit(1); - } /* cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res); if (res != CL_SUCCESS) { @@ -120,30 +118,29 @@ i32 gpusolve(cnf* c) { exit(1); } - u32 cnfheader[3] = { c->litcnt, c->varcnt, c->clausecnt }; - // Load buffers to GPU - res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 3 * sizeof(cl_uint), cnfheader, 0, NULL, NULL); + res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL); if (res != CL_SUCCESS) { printf("Failed to queue CNF header write\n"); exit(1); } - res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, c->clausecnt * sizeof(cl_uint), c->lastvars, 0, NULL, NULL); + res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, 3 * c->cnts[1] * sizeof(cl_uint), c->clausedat, 0, NULL, NULL); if (res != CL_SUCCESS) { printf("Failed to queue CNF lvar write\n"); exit(1); } - res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->variables, 0, NULL, NULL); + res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uint), c->variables, 0, NULL, NULL); if (res != CL_SUCCESS) { printf("Failed to queue CNF variable write\n"); exit(1); } - res = clEnqueueWriteBuffer(commqueue, gpuclauses, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->clauses, 0, NULL, NULL); + res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uchar), c->parities, 0, NULL, NULL); if (res != CL_SUCCESS) { - printf("Failed to queue CNF clause write\n"); + printf("Failed to queue CNF parity write\n"); exit(1); } - res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->litcnt * sizeof(cl_uchar), c->pars, 0, NULL, NULL); + + res = clEnqueueWriteBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL); if (res != CL_SUCCESS) { printf("Failed to queue CNF parity write\n"); exit(1); @@ -179,18 +176,16 @@ i32 gpusolve(cnf* c) { size_t maxworkgrpu = 0; res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL); - printf("Max work group size: %lu\n", maxworkgrpu); + // printf("Max work group size: %lu\n", maxworkgrpu); res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader); res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars); res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables); - res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuclauses); - res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuparities); + res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuparities); - res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuoutput); + res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuoutput); - res = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*) &gpuscratchpad); - res = clSetKernelArg(kernel, 7, LOCAL_SIZE * sizeof(cl_uint), NULL); + res = clSetKernelArg(kernel, 5, 2 * wordcnt * sizeof(cl_uint) * LOCAL_SIZE, NULL); // u64 starttime = utime(); size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE }; @@ -207,34 +202,39 @@ i32 gpusolve(cnf* c) { } // u64 endtime = utime(); - if (solution[0] == 1) { + if (solution[0] == 0) { printf("UNSAT\n"); - } else if (solution[0] == 0) { - printf("SAT\n"); - for (u32 k = 0; k < c->varcnt; ++k) { - u32 vind = (c->varcnt - 1) - k; + } else if (solution[0] == 1) { + printf("SAT: "); + for (u32 k = 0; k < c->cnts[0]; ++k) { + u32 vind = (c->cnts[0] - 1) - k; u32 iind = vind >> 5U; u32 bind = vind & 0b11111U; u8 par = (solution[iind + 1] >> bind) & 1U; printf("%u", par); } - printf("\n"); if (CHECKASGN) { - u8* assigncheck = calloc(c->clausecnt, sizeof(u8)); - for (u32 i = 0; i < c->litcnt; ++i) { - u32 g = ((c->varcnt - 1) - c->variables[i]) >> 5U; - u32 h = ((c->varcnt - 1) - c->variables[i]) & 0b11111U; - u8 paract = (solution[g + 1] >> h) & 1U; - if (c->pars[i] == paract) assigncheck[c->clauses[i]] = true; - } - for (u32 i = 0; i < c->clausecnt; ++i) { - if (!assigncheck[i]) { - printf("Failed assignment check\n"); - solution[0] = 4; + u8 checkres = 0; + for (u32 i = 0; i < c->cnts[1]; ++i) { + checkres = 0; + for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) { + u32 v = c->variables[c->clausedat[3 * i] + j]; + u32 vv = c->cnts[0] - 1; + u32 g = (vv - v) >> 5U; + u32 h = (vv - v) & 0b11111U; + u8 paract = (solution[g + 1] >> h) & 1U; + if (c->parities[c->clausedat[3 * i] + j] == paract) { + checkres = 1; + break; + } } + if (!checkres) break; + } + if (checkres) { + printf(" \xE2\x9C\x93\n"); + } else { + printf(" -\n"); } - free(assigncheck); - printf("Passed assignment check\n"); } } else { printf("What the fuck???\n"); @@ -253,16 +253,14 @@ i32 gpusolve(cnf* c) { res = clReleaseMemObject(gpuheader); res = clReleaseMemObject(gpulvars); res = clReleaseMemObject(gpuvariables); - res = clReleaseMemObject(gpuclauses); res = clReleaseMemObject(gpuparities); res = clReleaseMemObject(gpuoutput); - res = clReleaseMemObject(gpuscratchpad); res = clReleaseDevice(deviceid); - i32 retval = solution[0]; + i32 retval = (i32) solution[0]; free(solution); free(source_str); return retval; -} \ No newline at end of file +} diff --git a/gpusolver.h b/gpusolver.h index 65e3728..a1d91a1 100644 --- a/gpusolver.h +++ b/gpusolver.h @@ -1,4 +1,4 @@ #pragma once -#include "cnf.h" +#include "ncnf.h" i32 gpusolve(cnf* c); \ No newline at end of file diff --git a/main.c b/main.c index 455956b..21663ee 100644 --- a/main.c +++ b/main.c @@ -1,7 +1,7 @@ #include -// #include "gpusolver.h" +#include "gpusolver.h" #include "time.h" -// #include "tests/masterTest.h" +#include "tests/masterTest.h" #include "gmp.h" #include "rng.h" #include "ncnf.h" @@ -10,8 +10,9 @@ #define CMP (1) #define CHK (2) + void ctrthings2(cnf* c, u32* state, u32* ctr, u32* max) { - u32 wcnt = 1U + (c->cnts[0] / 32U); + u32 wcnt = 1U + (c->cnts[0] >> 5U); u32* mode = state; u32* index = state + 1; u32* addval = state + 2; @@ -27,101 +28,40 @@ void ctrthings2(cnf* c, u32* state, u32* ctr, u32* max) { u8 corpar = (ctr[vword] >> vbit) & 1U; u8 isvalid = (par == corpar); u8 islvar = ((*addval + 1) == c->clausedat[3 * chkcls + 1]); - if (*mode == CHK) { - // printf("> %u %u\n", *index, *addval); - // printf("%u %u %u\n", var, par, corpar); - // printf("%u %u\n", chkcls, chkind); - // printf("%u %u %u\n", islvar, isvalid, c->clausedat[3 * chkcls + 1]); - - /* - * if last var - * if valid, add - * if invalid, iterate clause - * else - * if valid, iterate addval up to len - * if invalid, iterate claus - * If current var is valid: - * if last var: - * - */ - if (isvalid) { - if (islvar) { - u32 j = c->clausedat[3 * chkcls + 2]; - *mode -= 2; - printf("j: %u\n", j); - *index = j >> 5U; - *addval = 1U << (j & 0b11111U); - } else { - *addval += 1U; - } - } else { - *addval = 0; - *index += 1U; - if (*index == c->cnts[1]) { - printf("SAT\n"); - *mode = 4; - return; - } - } - } /* else { - - //printf("YEET\n"); - if (*index >= wcnt) printf("FUCK\n"); - u32 nval = ctr[*index] + *addval; // Find the result of the current step if it was addition - *addval = (nval < ctr[*index]) * (*mode == ADD); // If in add mode, set addval to carry, else set 0 - ctr[*index] = nval * (*mode == ADD) + ctr[*index] * (*mode != ADD); // If in add mode, set new ctr val, otherwise leave unchanged - *addval -= (ctr[*index] < max[*index]) * (*mode == CMP); // If in comparison mode, decrement addval if less than - *addval += (ctr[*index] > max[*index]) * (*mode == CMP); // If in comparison mode, increment addval if greater than - bool addcond = (*addval == 0) | (*index == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word - bool cmpcond = (*addval != 0) | (*index == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word - if (*mode == CMP && cmpcond && *addval != -1) { // If in cmpmode and the comparison result is not less than, unsat - printf("UNSAT\n"); - *mode = 4; - return; - } - bool cmpdone = cmpcond * (*mode == CMP); // if comparison completion conditions are satisfied and in CMP mode - u32 addindex = (*index + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1 - *index = addindex * (*mode == ADD) + (*index - 1) * (*mode == CMP); // If in add mode, use addindex; if in cmp mode, decrement index by 1 - *index *= !cmpdone; - // Leave adval alone if: - // not in add mode - // add mode isn't done - // not in cmp mode - // cmp mode isn't done - // - *addval *= !(((addcond) & (*mode != ADD)) & cmpdone); // If add is complete, zero addval, else leave unchanged - *mode += addcond * (*mode == ADD) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode. - - } - */ + u8 isbchk0 = (*mode == CHK); + u8 isbchk1 = isbchk0 & isvalid; + u8 isbchk2 = isbchk1 & islvar; + u32 j = c->clausedat[3 * chkcls + 2]; + *mode -= 2 * isbchk2; + *index = (j >> 5U) * isbchk2 + *index * (!isbchk2); + *addval = (1U << (j & 0b11111U)) * isbchk2 + *addval * (!isbchk2); + *addval += ((isbchk1) & (!islvar)); + u8 isbchk3 = (isbchk0 & (!isvalid)); + *addval *= (!isbchk3); + *index += (isbchk3); + u8 issat = (*index == c->cnts[1]) * (isbchk3); u32 cmpaddind = *index * (*mode != CHK); - if (cmpaddind >= wcnt) printf("FUCK\n"); u32 nval = ctr[cmpaddind] + *addval; // Find the result of the current step if it was addition *addval = (nval < ctr[cmpaddind]) * (*mode == ADD) + (*addval) * (*mode == CHK); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone. - ctr[cmpaddind] = nval * (*mode == ADD) + ctr[cmpaddind] * (*mode != ADD); // If in add mode, set new ctr val, otherwise leave unchanged + ctr[cmpaddind] = nval * ((*mode == ADD) & !issat) + ctr[cmpaddind] * ((*mode != ADD) | issat); // If in add mode, set new ctr val, otherwise leave unchanged *addval -= (ctr[cmpaddind] < max[cmpaddind]) * (*mode == CMP); // If in comparison mode, decrement addval if less than *addval += (ctr[cmpaddind] > max[cmpaddind]) * (*mode == CMP); // If in comparison mode, increment addval if greater than - bool addcond = (*addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word - bool cmpcond = (*addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word - if (*mode == CMP && cmpcond && *addval != -1) { // If in cmpmode and the comparison result is not less than, unsat - printf("UNSAT\n"); + u8 addcond = (*addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word + u8 cmpcond = (*addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word + u8 exittime = (*mode == CMP) & cmpcond & (*addval != -1); + exittime |= issat; + if (exittime) { // If in cmpmode and the comparison result is not less than, unsat + printf("Result: %u\n", issat); *mode = 4; return; } - bool cmpdone = cmpcond & (*mode == CMP); // if comparison completion conditions are satisfied and in CMP mode + u8 cmpdone = cmpcond & (*mode == CMP); // if comparison completion conditions are satisfied and in CMP mode u32 addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1 *index = addindex * (*mode == ADD) + (*index - (*mode == CMP)) * (*mode != ADD); // If in add mode, use addindex; if in cmp mode, decrement index by 1 *index *= !cmpdone; - // Leave adval alone if: - // not in add mode - // add mode isn't done - // not in cmp mode - // cmp mode isn't done - // *addval *= !(((addcond) & (*mode == ADD)) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged. *mode += addcond * (*mode == ADD) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode. - } void printbits(unsigned a) { @@ -134,6 +74,38 @@ void printbits(unsigned a) { #define TESTS (274877906944LU >> 10U) #define CSZE (83LU) #define eqprob (0.01f) + + +void mul(u32* c, u32 len, u32* a, u32 b) { + u32 carry = 0; + for (u32 i = 0; i < len; ++i) { + u32 ncarry; + u32 blo = a[i] & 0xFFFFU; + u32 bhi = a[i] >> 16U; + u32 ilo = b & 0xFFFFU; + u32 ihi = b >> 16U; + + *(c + i) = ilo * blo; + u32 b1 = ilo * bhi; + u32 c1 = ihi * blo; + ncarry = ihi * bhi; + + b1 += c1; + ncarry += (b1 < c1) << 16U; + u32 bblo = b1 & 0xFFFFU; + u32 bbhi = b1 >> 16U; + bblo <<= 16U; + *(c + i) += bblo; + u8 acarry = *(c + i) < bblo; + + ncarry += bbhi + acarry; + c[i] += carry; + ncarry += c[i] < carry; + carry = ncarry; + } +} + + int main() { /* printf("Tests: %lu\n", TESTS); @@ -276,38 +248,20 @@ int main() { /* Expects a path to a DIMACS file */ - cnf* c = readDIMACS("/home/lev/Downloads/uf20/uf20-022.cnf"); + + + /* + cnf* c = readDIMACS("/home/lev/Downloads/logistics/logistics.d.cnf"); sortlastnum(c); - // printcnf(c); - u32 wcnt = 1U + (c->cnts[0] / 32U); + printf("%u\n", c->cnts[0]); + gpusolve(c); - u32* ctr = calloc(wcnt, sizeof(u32)); - u32* max = calloc(wcnt, sizeof(u32)); - - max[c->cnts[0] >> 5U] = 1U << (c->cnts[0] & 0b11111U); - - u32 state[3]; - - state[0] = 2; - state[1] = state[2] = 0; - - - u32 mtr = 0; - while (state[0] < 3U) { - u32 cmd = state[0]; - ctrthings2(c, state, ctr, max); - if (state[0] != 2 && cmd == 2) { - //printf("\n"); - //for (unsigned i = wcnt - 1; i < wcnt; --i) printbits(ctr[i]); - //printf("\n"); - //mtr++; - //if (mtr == 10) exit(15); - // printf("%u %u %u\n", state[0], state[1], state[2]); - } - } - + freecnf(c); + return 0; + */ + runTests(); return 0; /* diff --git a/ncnf.c b/ncnf.c index 37b88b4..e9e8f05 100644 --- a/ncnf.c +++ b/ncnf.c @@ -127,6 +127,7 @@ cnf* readDIMACS(char* path) { // Realloc the arrays to exactly match the number of literals c->variables = realloc(c->variables, sizeof(u32) * cnt); c->parities = realloc(c->parities, sizeof(u8) * cnt); + c->cnts[2] = cnt; free(buf); if (fclose(f)) { printf("Failed to close file\n"); diff --git a/ncnf.h b/ncnf.h index c61d80f..6e84f26 100644 --- a/ncnf.h +++ b/ncnf.h @@ -10,7 +10,7 @@ } typedef struct { - u32 cnts[2]; // { varcnt, clausecnt } + u32 cnts[3]; // { varcnt, clausecnt } u32* clausedat; // { ind, len, jval } u32* variables; u8* parities; diff --git a/psat.cl b/psat.cl index c29dcff..13fd003 100644 --- a/psat.cl +++ b/psat.cl @@ -32,120 +32,122 @@ static inline void stateaddpow(uint wcnt, uint* state, uint pow) { } } -__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) { - output[0] = 2; +void mul(uint* c, uint len, uint* a, uint b) { + uint carry = 0; + for (uint i = 0; i < len; ++i) { + uint ncarry; + uint blo = a[i] & 0xFFFFU; + uint bhi = a[i] >> 16U; + uint ilo = b & 0xFFFFU; + uint ihi = b >> 16U; - __local uint setmax; + *(c + i) = ilo * blo; + uint b1 = ilo * bhi; + uint c1 = ihi * blo; + ncarry = ihi * bhi; - uint cnt = cnfheader[0]; - uint vcnt = cnfheader[1]; - uint ccnt = cnfheader[2]; - - uint wcnt = 1 + (vcnt >> 5U); - - uint maxctr = 1U << (vcnt & 0b11111U); - - //uint glbid = get_global_id(0); - //uint glbsz = get_global_size(0); + b1 += c1; + ncarry += (b1 < c1) << 16U; + uint bblo = b1 & 0xFFFFU; + uint bbhi = b1 >> 16U; + bblo <<= 16U; + *(c + i) += bblo; + uchar acarry = *(c + i) < bblo; + ncarry += bbhi + acarry; + c[i] += carry; + ncarry += c[i] < carry; + carry = ncarry; + } +} +__kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clausedat, __global const uint* vars, __global const uchar* pars, __global uint* output, __local uint* lctrs) { uint locid = get_local_id(0); uint locsz = get_local_size(0); // uint grpid = get_group_id(0); // uint grpcn = get_num_groups(0); + uint globid = get_global_id(0); + uint globsz = get_global_size(0); - // Zero out the counter - for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0; + uint wcnt = 1U + (cnfhdr[0] >> 5U); - // Set all scratchpad clauses to true - for (uint j = 0; j < ccnt; j += locsz) { - uchar cond = (j + locid) < ccnt; - j = j * cond + (!cond) * (ccnt - locid - 1); - scratchpad[j + locid] = 1; + uint mode = 2; + uint index = 0; + uint addval = 0; + + + if (globid == 0) output[0] = 0; + + uint* ctr = lctrs + wcnt * 2 * locid; + uint* max = lctrs + wcnt * (2 * locid + 1); + + for (uint i = 0; i < wcnt; ++i) { + ctr[i] = max[i] = 0; } - __local uint firstind[1]; - while (output[0] == 2) { - firstind[0] = ccnt; - - setmax = 0; - uint maxnumx = 0; - - for (uint j = 0; j < cnt; j += locsz) { - - uchar cond = (j + locid) < cnt; - // Last element cap - j = j * cond + (!cond) * (cnt - locid - 1); - uint varind = vars[j + locid]; - varind = (vcnt - 1) - varind; - uint iind = varind >> 5U; - uint bind = varind & 0b11111U; - uchar cpar = (output[iind + 1] >> bind) & 1U; - - if (cpar != pars[j + locid]) { - scratchpad[clauses[j + locid]] = 0; - } - } - - barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + mul(ctr, wcnt, output + 1, globid); + if (globid == globsz) { + stateaddpow(wcnt, max, cnfhdr[0]); + } else { + mul(max, wcnt, output + 1, globid + 1); + } - for (uint j = 0; j < ccnt; j += locsz) { - if (scratchpad[j + locid] == 1 && (j + locid) < ccnt) { - setmax = 1; - } - } + barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); - barrier(CLK_LOCAL_MEM_FENCE); + uint varcnt = cnfhdr[0] - 1; + while (output[0] == 0) { - if (setmax) { - // Set maxval array to zero - maxvals[locid] = 0; + uint chkmsk = 0xFFFFFFFFU * (mode == 2U); + uint chkcls = index & chkmsk; + uint chkind = clausedat[3 * chkcls] + (addval & chkmsk); + uint var = vars[chkind]; + uchar par = pars[chkind]; + uint vword = (varcnt - var) >> 5U; + uint vbit = (varcnt - var) & 0b11111U; + uchar corpar = (ctr[vword] >> vbit) & 1U; + uchar isvalid = (par == corpar); + uchar islvar = ((addval + 1) == clausedat[3 * chkcls + 1]); + uchar isbchk0 = (mode == 2U); + uchar isbchk1 = isbchk0 & isvalid; + uchar isbchk2 = isbchk1 & islvar; + uint j = clausedat[3 * chkcls + 2]; + mode -= 2 * isbchk2; + // if (isbchk2) printf("j: %u\n", j); + index = (j >> 5U) * isbchk2 + index * (!isbchk2); + addval = (1U << (j & 0b11111U)) * isbchk2 + addval * (!isbchk2); + addval += ((isbchk1) & (!islvar)); + uchar isbchk3 = (isbchk0 & (!isvalid)); + addval *= (!isbchk3); + index += (isbchk3); + uchar issat = (index == cnfhdr[1]) * (isbchk3); - // Accumulate and reduce the maximums - for (uint j = 0; j < ccnt; j += locsz) { - //uint a = maxvals[locid]; - //uint b = lvars[j + locid]; - // uint c = max(a, b); - if ((j + locid) < ccnt && scratchpad[j + locid] == 1) { - //maxvals[locid] = c; - atomic_min(firstind, (j + locid)); + uint cmpaddind = index * (mode != 2U); + uint nval = ctr[cmpaddind] + addval; // Find the result of the current step if it was addition + addval = (nval < ctr[cmpaddind]) * (mode == 0) + (addval) * (mode == 2U); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone. + ctr[cmpaddind] = nval * ((mode == 0) & !issat) + ctr[cmpaddind] * ((mode != 0) | issat); // If in add mode, set new ctr val, otherwise leave unchanged + addval -= (ctr[cmpaddind] < max[cmpaddind]) * (mode == 1); // If in comparison mode, decrement addval if less than + addval += (ctr[cmpaddind] > max[cmpaddind]) * (mode == 1); // If in comparison mode, increment addval if greater than + uchar addcond = (addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word + uchar cmpcond = (addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word + uchar exittime = (mode == 1) & cmpcond & (addval != -1); + exittime |= issat; + if (exittime) { // If in cmpmode and the comparison result is not less than, unsat + if (issat) { + if (atomic_cmpxchg(output, 0, 1) == 0) { + for (uint i = 0; i < wcnt; ++i) { + output[i + 1] = ~ctr[i]; + } + output[0] = 1; } } - - barrier(CLK_LOCAL_MEM_FENCE); - - uint maxj = lvars[firstind[0]]; - - // Set all scratchpad clauses to true - for (uint j = 0; j < ccnt; j += locsz) { - uchar cond = (j + locid) < ccnt; - j = j * cond + (!cond) * (ccnt - locid - 1); - scratchpad[j + locid] = 1; - } - - // Final reduction pass - /* - uint maxj = maxvals[0]; - for (uint j = 1; j < locsz; ++j) { - maxj = max(maxj, maxvals[j]); - } - */ - - // Add to the counter - if (locid == 0) { - stateaddpow(wcnt, output + 1, maxj); - } - - if (output[wcnt] >= maxctr) { - output[0] = 1; - } - } else { - output[0] = 0; - if (locid == 0) { - for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1]; - } + return; } - barrier(CLK_LOCAL_MEM_FENCE); + uchar cmpdone = cmpcond & (mode == 1); // if comparison completion conditions are satisfied and in CMP mode + uint addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1 + index = addindex * (mode == 0) + (index - (mode == 1)) * (mode != 0); // If in add mode, use addindex; if in cmp mode, decrement index by 1 + index *= !cmpdone; + addval *= !(((addcond) & (mode == 0)) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged. + mode += addcond * (mode == 0) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode. } } \ No newline at end of file diff --git a/tests/masterTest.c b/tests/masterTest.c index 471d369..db87a35 100644 --- a/tests/masterTest.c +++ b/tests/masterTest.c @@ -24,7 +24,7 @@ i32 runTests() { i32 runuf20() { - printf("Running against uf20\n"); + // printf("Running against uf20\n"); u32 passed = 0; u64 tottime = 0; for (u32 i = 0; i < 1000; ++i) { @@ -33,62 +33,7 @@ i32 runuf20() { cnf* c = readDIMACS(buf); - // TODO: Uncomment - // sortlastnum(c, c->litcnt); - - u64 start = utime(); - i32 res = gpusolve(c); - u64 stop = utime(); - tottime += (stop - start); - - freecnf(c); - if (res == 0) passed++; - } - printf("Passed %u / 1000 tests\n", passed); - printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); - if (passed == 1000) return 0; - return 1; -} - -i32 runuf50() { - printf("Running against uf50\n"); - u32 passed = 0; - u64 tottime = 0; - for (u32 i = 0; i < 1000; ++i) { - char buf[128]; - i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1); - - cnf* c = readDIMACS(buf); - - // TODO: Uncomment - // sortlastnum(c, c->litcnt); - - u64 start = utime(); - i32 res = gpusolve(c); - u64 stop = utime(); - tottime += (stop - start); - - freecnf(c); - if (res == 0) passed++; - } - printf("Passed %u / 1000 tests\n", passed); - printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); - if (passed == 1000) return 0; - return 1; -} - -i32 runuuf50() { - printf("Running against uuf50\n"); - u32 passed = 0; - u64 tottime = 0; - for (u32 i = 0; i < 1000; ++i) { - char buf[128]; - i32 len = sprintf(buf, "/home/lev/Downloads/uuf50/uuf50-0%u.cnf", i + 1); - - cnf* c = readDIMACS(buf); - - // TODO: Uncomment - // sortlastnum(c, c->litcnt); + sortlastnum(c); u64 start = utime(); i32 res = gpusolve(c); @@ -98,8 +43,60 @@ i32 runuuf50() { freecnf(c); if (res == 1) passed++; } - printf("Passed %u / 1000 tests\n", passed); - printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); + // printf("Passed %u / 1000 tests\n", passed); + // printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); + if (passed == 1000) return 0; + return 1; +} + +i32 runuf50() { + // printf("Running against uf50\n"); + u32 passed = 0; + u64 tottime = 0; + for (u32 i = 0; i < 1000; ++i) { + char buf[128]; + i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1); + + cnf* c = readDIMACS(buf); + + sortlastnum(c); + + u64 start = utime(); + i32 res = gpusolve(c); + u64 stop = utime(); + tottime += (stop - start); + + freecnf(c); + if (res == 1) passed++; + } + // printf("Passed %u / 1000 tests\n", passed); + // printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); + if (passed == 1000) return 0; + return 1; +} + +i32 runuuf50() { + // printf("Running against uuf50\n"); + u32 passed = 0; + u64 tottime = 0; + for (u32 i = 0; i < 1000; ++i) { + char buf[128]; + i32 len = sprintf(buf, "/home/lev/Downloads/uuf50/uuf50-0%u.cnf", i + 1); + + cnf* c = readDIMACS(buf); + + sortlastnum(c); + + u64 start = utime(); + i32 res = gpusolve(c); + u64 stop = utime(); + tottime += (stop - start); + + freecnf(c); + if (res == 0) passed++; + } + // printf("Passed %u / 1000 tests\n", passed); + // printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); if (passed == 1000) return 0; return 1; } \ No newline at end of file