2 Commits

Author SHA1 Message Date
gothictomato
a0e1d4f5b4 Added new CNF format
Signed-off-by: gothictomato <gothictomato@pm.me>
2022-07-24 17:47:31 -04:00
gothictomato
8e34675408 Initial progress on subsumption DFA. Why is this branch called subsumption-tree? 2022-07-21 10:56:05 -04:00
14 changed files with 665 additions and 719 deletions

View File

@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.22)
project(psat C) project(psat C)
set(CMAKE_C_STANDARD 99) set(CMAKE_C_STANDARD 99)
# set(CMAKE_C_FLAGS "-mavx2 -O3 -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts -ftree-vectorize -ftracer -funroll-all-loops ") set(CMAKE_C_FLAGS "-g")
add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h ncnf.c ncnf.h rng.h rng.c) add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h subtree.c subtree.h ncnf.c ncnf.h)
target_link_libraries(psat -lOpenCL -lgmp) target_link_libraries(psat -lOpenCL)

2
cnf.c
View File

@@ -1,6 +1,5 @@
#include "cnf.h" #include "cnf.h"
/*
cnf* readDIMACS(char* path) { cnf* readDIMACS(char* path) {
cnf* c = malloc(sizeof(cnf)); cnf* c = malloc(sizeof(cnf));
CHECK(c, "Failed to alloc CNF struct\n"); CHECK(c, "Failed to alloc CNF struct\n");
@@ -326,4 +325,3 @@ void sortlastnum(cnf* c, u64 N) {
free(swaparr); free(swaparr);
free(d); free(d);
} }
*/

4
cnf.h
View File

@@ -3,8 +3,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
/*
#define CHECK(X, Y) if (X == NULL) { \ #define CHECK(X, Y) if (X == NULL) { \
printf(Y); \ printf(Y); \
@@ -22,7 +20,6 @@ typedef struct {
u8* pars; u8* pars;
} cnf; } cnf;
cnf* readDIMACS(char* path); cnf* readDIMACS(char* path);
void printcnf(cnf* c); void printcnf(cnf* c);
@@ -32,4 +29,3 @@ void sortlastnum(cnf* c, u64 N);
void freecnf(cnf* c); void freecnf(cnf* c);
*/

View File

@@ -1,11 +1,9 @@
#include "gpusolver.h" #include "gpusolver.h"
#include <CL/cl.h> #include <CL/cl.h>
#include "time.h" #include "time.h"
#include "gmp.h"
#define GLOBAL_SIZE (256)
#define LOCAL_SIZE (128) #define LOCAL_SIZE (GLOBAL_SIZE)
#define GLOBAL_SIZE (1024)
#define CHECKASGN (true) #define CHECKASGN (true)
@@ -30,7 +28,7 @@ i32 gpusolve(cnf* c) {
source_size = fread( source_str, 1, 0x100000, fp); source_size = fread( source_str, 1, 0x100000, fp);
fclose( fp ); fclose( fp );
u32 wordcnt = 1 + ((c->cnts[0]) >> 5U); u32 wordcnt = 1 + ((c->varcnt) >> 5U);
u32* solution = calloc((wordcnt + 1), sizeof(u32)); u32* solution = calloc((wordcnt + 1), sizeof(u32));
if (solution == NULL) { if (solution == NULL) {
@@ -38,13 +36,6 @@ i32 gpusolve(cnf* c) {
exit(1); exit(1);
} }
mpz_t gmpmax;
mpz_init(gmpmax);
mpz_ui_pow_ui(gmpmax, 2, c->cnts[0]);
mpz_div_ui(gmpmax, gmpmax, GLOBAL_SIZE);
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
mpz_clear(gmpmax);
cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms); cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to retrieve OpenCL platform IDs\n"); printf("Failed to retrieve OpenCL platform IDs\n");
@@ -83,27 +74,38 @@ i32 gpusolve(cnf* c) {
*/ */
// TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer // TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer
cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 2 * sizeof(cl_uint), NULL, &res); cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to create CNF header buffer\n"); printf("Failed to create CNF header buffer\n");
exit(1); exit(1);
} }
cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * c->cnts[1] * sizeof(cl_uint), NULL, &res); cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, c->clausecnt * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to create CNF lvar buffer\n"); printf("Failed to create CNF lvar buffer\n");
exit(1); exit(1);
} }
cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uint), NULL, &res); cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to create CNF variable buffer\n"); printf("Failed to create CNF variable buffer\n");
exit(1); exit(1);
} }
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uchar), NULL, &res); cl_mem gpuclauses = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF clause buffer\n");
exit(1);
}
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uchar), NULL, &res);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to create CNF parity buffer\n"); printf("Failed to create CNF parity buffer\n");
exit(1); exit(1);
} }
// Allocate scratchpad memory
cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, c->clausecnt * sizeof(cl_uchar), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF subsumption scratchpad buffer\n");
exit(1);
}
/* /*
cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res); cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
@@ -118,29 +120,30 @@ i32 gpusolve(cnf* c) {
exit(1); exit(1);
} }
u32 cnfheader[3] = { c->litcnt, c->varcnt, c->clausecnt };
// Load buffers to GPU // Load buffers to GPU
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL); res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 3 * sizeof(cl_uint), cnfheader, 0, NULL, NULL);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to queue CNF header write\n"); printf("Failed to queue CNF header write\n");
exit(1); exit(1);
} }
res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, 3 * c->cnts[1] * sizeof(cl_uint), c->clausedat, 0, NULL, NULL); res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, c->clausecnt * sizeof(cl_uint), c->lastvars, 0, NULL, NULL);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to queue CNF lvar write\n"); printf("Failed to queue CNF lvar write\n");
exit(1); exit(1);
} }
res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uint), c->variables, 0, NULL, NULL); res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->variables, 0, NULL, NULL);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to queue CNF variable write\n"); printf("Failed to queue CNF variable write\n");
exit(1); exit(1);
} }
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uchar), c->parities, 0, NULL, NULL); res = clEnqueueWriteBuffer(commqueue, gpuclauses, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->clauses, 0, NULL, NULL);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to queue CNF parity write\n"); printf("Failed to queue CNF clause write\n");
exit(1); exit(1);
} }
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->litcnt * sizeof(cl_uchar), c->pars, 0, NULL, NULL);
res = clEnqueueWriteBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to queue CNF parity write\n"); printf("Failed to queue CNF parity write\n");
exit(1); exit(1);
@@ -173,19 +176,16 @@ i32 gpusolve(cnf* c) {
exit(1); exit(1);
} }
size_t maxworkgrpu = 0;
res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL);
// printf("Max work group size: %lu\n", maxworkgrpu);
res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader); res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars); res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables); res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuparities); res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuclauses);
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuparities);
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuoutput); res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuoutput);
res = clSetKernelArg(kernel, 5, 2 * wordcnt * sizeof(cl_uint) * LOCAL_SIZE, NULL); res = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*) &gpuscratchpad);
res = clSetKernelArg(kernel, 7, LOCAL_SIZE * sizeof(cl_uint), NULL);
// u64 starttime = utime(); // u64 starttime = utime();
size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE }; size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE };
@@ -202,39 +202,34 @@ i32 gpusolve(cnf* c) {
} }
// u64 endtime = utime(); // u64 endtime = utime();
if (solution[0] == 0) { if (solution[0] == 1) {
printf("UNSAT\n"); printf("UNSAT\n");
} else if (solution[0] == 1) { } else if (solution[0] == 0) {
printf("SAT: "); printf("SAT\n");
for (u32 k = 0; k < c->cnts[0]; ++k) { for (u32 k = 0; k < c->varcnt; ++k) {
u32 vind = (c->cnts[0] - 1) - k; u32 vind = (c->varcnt - 1) - k;
u32 iind = vind >> 5U; u32 iind = vind >> 5U;
u32 bind = vind & 0b11111U; u32 bind = vind & 0b11111U;
u8 par = (solution[iind + 1] >> bind) & 1U; u8 par = (solution[iind + 1] >> bind) & 1U;
printf("%u", par); printf("%u", par);
} }
printf("\n");
if (CHECKASGN) { if (CHECKASGN) {
u8 checkres = 0; u8* assigncheck = calloc(c->clausecnt, sizeof(u8));
for (u32 i = 0; i < c->cnts[1]; ++i) { for (u32 i = 0; i < c->litcnt; ++i) {
checkres = 0; u32 g = ((c->varcnt - 1) - c->variables[i]) >> 5U;
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) { u32 h = ((c->varcnt - 1) - c->variables[i]) & 0b11111U;
u32 v = c->variables[c->clausedat[3 * i] + j]; u8 paract = (solution[g + 1] >> h) & 1U;
u32 vv = c->cnts[0] - 1; if (c->pars[i] == paract) assigncheck[c->clauses[i]] = true;
u32 g = (vv - v) >> 5U; }
u32 h = (vv - v) & 0b11111U; for (u32 i = 0; i < c->clausecnt; ++i) {
u8 paract = (solution[g + 1] >> h) & 1U; if (!assigncheck[i]) {
if (c->parities[c->clausedat[3 * i] + j] == paract) { printf("Failed assignment check\n");
checkres = 1; solution[0] = 4;
break;
}
} }
if (!checkres) break;
}
if (checkres) {
printf(" \xE2\x9C\x93\n");
} else {
printf(" -\n");
} }
free(assigncheck);
printf("Passed assignment check\n");
} }
} else { } else {
printf("What the fuck???\n"); printf("What the fuck???\n");
@@ -253,12 +248,14 @@ i32 gpusolve(cnf* c) {
res = clReleaseMemObject(gpuheader); res = clReleaseMemObject(gpuheader);
res = clReleaseMemObject(gpulvars); res = clReleaseMemObject(gpulvars);
res = clReleaseMemObject(gpuvariables); res = clReleaseMemObject(gpuvariables);
res = clReleaseMemObject(gpuclauses);
res = clReleaseMemObject(gpuparities); res = clReleaseMemObject(gpuparities);
res = clReleaseMemObject(gpuoutput); res = clReleaseMemObject(gpuoutput);
res = clReleaseMemObject(gpuscratchpad);
res = clReleaseDevice(deviceid); res = clReleaseDevice(deviceid);
i32 retval = (i32) solution[0]; i32 retval = solution[0];
free(solution); free(solution);
free(source_str); free(source_str);

View File

@@ -1,4 +1,4 @@
#pragma once #pragma once
#include "ncnf.h" #include "cnf.h"
i32 gpusolve(cnf* c); i32 gpusolve(cnf* c);

250
main.c
View File

@@ -1,228 +1,21 @@
#include <stdio.h> #include <stdio.h>
#include "cnf.h"
#include "gpusolver.h" #include "gpusolver.h"
#include "time.h" #include "time.h"
#include "tests/masterTest.h" #include "tests/masterTest.h"
#include "gmp.h"
#include "rng.h"
#include "ncnf.h" #include "ncnf.h"
#define ADD (0)
#define CMP (1)
#define CHK (2)
void ctrthings2(cnf* c, u32* state, u32* ctr, u32* max) {
u32 wcnt = 1U + (c->cnts[0] >> 5U);
u32* mode = state;
u32* index = state + 1;
u32* addval = state + 2;
u32 varcnt = c->cnts[0] - 1;
u32 chkmsk = 0xFFFFFFFFU * (*mode == CHK);
u32 chkcls = *index & chkmsk;
u32 chkind = c->clausedat[3 * chkcls] + (*addval & chkmsk);
u32 var = c->variables[chkind];
u8 par = c->parities[chkind];
u32 vword = (varcnt - var) >> 5U;
u32 vbit = (varcnt - var) & 0b11111U;
u8 corpar = (ctr[vword] >> vbit) & 1U;
u8 isvalid = (par == corpar);
u8 islvar = ((*addval + 1) == c->clausedat[3 * chkcls + 1]);
u8 isbchk0 = (*mode == CHK);
u8 isbchk1 = isbchk0 & isvalid;
u8 isbchk2 = isbchk1 & islvar;
u32 j = c->clausedat[3 * chkcls + 2];
*mode -= 2 * isbchk2;
*index = (j >> 5U) * isbchk2 + *index * (!isbchk2);
*addval = (1U << (j & 0b11111U)) * isbchk2 + *addval * (!isbchk2);
*addval += ((isbchk1) & (!islvar));
u8 isbchk3 = (isbchk0 & (!isvalid));
*addval *= (!isbchk3);
*index += (isbchk3);
u8 issat = (*index == c->cnts[1]) * (isbchk3);
u32 cmpaddind = *index * (*mode != CHK);
u32 nval = ctr[cmpaddind] + *addval; // Find the result of the current step if it was addition
*addval = (nval < ctr[cmpaddind]) * (*mode == ADD) + (*addval) * (*mode == CHK); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone.
ctr[cmpaddind] = nval * ((*mode == ADD) & !issat) + ctr[cmpaddind] * ((*mode != ADD) | issat); // If in add mode, set new ctr val, otherwise leave unchanged
*addval -= (ctr[cmpaddind] < max[cmpaddind]) * (*mode == CMP); // If in comparison mode, decrement addval if less than
*addval += (ctr[cmpaddind] > max[cmpaddind]) * (*mode == CMP); // If in comparison mode, increment addval if greater than
u8 addcond = (*addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word
u8 cmpcond = (*addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word
u8 exittime = (*mode == CMP) & cmpcond & (*addval != -1);
exittime |= issat;
if (exittime) { // If in cmpmode and the comparison result is not less than, unsat
printf("Result: %u\n", issat);
*mode = 4;
return;
}
u8 cmpdone = cmpcond & (*mode == CMP); // if comparison completion conditions are satisfied and in CMP mode
u32 addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1
*index = addindex * (*mode == ADD) + (*index - (*mode == CMP)) * (*mode != ADD); // If in add mode, use addindex; if in cmp mode, decrement index by 1
*index *= !cmpdone;
*addval *= !(((addcond) & (*mode == ADD)) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged.
*mode += addcond * (*mode == ADD) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode.
}
void printbits(unsigned a) {
for (unsigned i = 0; i < 32; ++i) {
unsigned ind = 31 - i;
printf("%u", (a >> ind) & 1U);
}
}
#define TESTS (274877906944LU >> 10U)
#define CSZE (83LU)
#define eqprob (0.01f)
void mul(u32* c, u32 len, u32* a, u32 b) {
u32 carry = 0;
for (u32 i = 0; i < len; ++i) {
u32 ncarry;
u32 blo = a[i] & 0xFFFFU;
u32 bhi = a[i] >> 16U;
u32 ilo = b & 0xFFFFU;
u32 ihi = b >> 16U;
*(c + i) = ilo * blo;
u32 b1 = ilo * bhi;
u32 c1 = ihi * blo;
ncarry = ihi * bhi;
b1 += c1;
ncarry += (b1 < c1) << 16U;
u32 bblo = b1 & 0xFFFFU;
u32 bbhi = b1 >> 16U;
bblo <<= 16U;
*(c + i) += bblo;
u8 acarry = *(c + i) < bblo;
ncarry += bbhi + acarry;
c[i] += carry;
ncarry += c[i] < carry;
carry = ncarry;
}
}
int main() { int main() {
/* ncnf* c2 = nreadDIMACS("/home/lev/Downloads/uf20/uf20-02.cnf");
printf("Tests: %lu\n", TESTS); nprintcnf(c2);
rngstate rng;
u64 rseed = utime();
seed(&rng, rseed);
printf("Seed: %lu\n", rseed);
mpz_t a, b, c, d, e;
mpz_inits(a, b, c, d, e, NULL);
u32 ctrp[CSZE];
u32 maxp[CSZE];
u32* ctr = ctrp;
u32* max = maxp;
u32 state[3];
u32 hdr[2];
char buf[4096];
for (u64 i = 0; i < TESTS; ++i) {
memset(ctr, 0, sizeof(u32) * CSZE);
memset(max, 0, sizeof(u32) * CSZE);
//u32 lenval = ru32(&rng) % CSZE;
u32 lenval = CSZE;
if (lenval < 2) lenval = 2;
u32 jval = ru32(&rng) % ((lenval - 1) * 32);
for (u32 j = 0; j < lenval - 2; ++j) {
ctr[j] = ru32(&rng);
max[j] = ru32(&rng);
}
mpz_import(a, lenval, -1, sizeof(u32), 0, 0, ctr);
mpz_import(b, lenval, -1, sizeof(u32), 0, 0, max);
state[1] = jval >> 5U;
state[2] = jval & 0b11111U;
state[2] = 1U << state[2];
mpz_ui_pow_ui(c, 2, jval);
if (rf32(&rng) < eqprob && mpz_cmp) {
mpz_sub(a, b, c);
if (mpz_sgn(a) != -1) {
mpz_export(ctr, NULL, -1, sizeof(u32), 0, 0, a);
} else {
mpz_import(a, lenval, -1, sizeof(u32), 0, 0, ctr);
}
}
mpz_add(d, a, c);
state[0] = 0;
while (state[0] < 2U) {
ctrthings2(hdr, lenval, state, ctr, max);
}
mpz_import(c, lenval, -1, sizeof(u32), 0, 0, ctr);
i32 res = mpz_cmp(d, b);
if (res == -1) {
if (state[2] != -1) {
printf("Fuck2 %lu\n", i);
printf("d: ");
mpz_out_str(stdout, 10, d);
printf("\nb: ");
mpz_out_str(stdout, 10, b);
printf("\n");
printf("mode: %u\n", state[2]);
exit(0);
}
} else {
if (state[2] != 2) {
printf("Fuck3 %lu\n", i);
printf("d: ");
mpz_out_str(stdout, 10, d);
printf("\nc: ");
mpz_out_str(stdout, 10, c);
printf("\nb: ");
mpz_out_str(stdout, 10, b);
printf("\n");
printf("mode: %u\n", state[2]);
exit(0);
}
}
res = mpz_cmp(d, c);
if (res != 0) {
printf("Fuck %lu\na: ", i);
mpz_out_str(stdout, 10, a);
printf("\nd: ");
mpz_out_str(stdout, 10, d);
printf("\nc: ");
mpz_out_str(stdout, 10, c);
printf("\n%u %u\n", lenval, jval);
exit(0);
}
}
mpz_clears(a, b, c, d, e, NULL);
*/
printf("\n\n\n\n");
nprintcnf2(c2);
nsort(c2);
printf("\n\n\n\n");
nprintcnf2(c2);
printf("the balcony is to your left\n");
return 0;
/* /*
srand( utime()); srand( utime());
u32 cnt = 4096; u32 cnt = 4096;
@@ -246,25 +39,7 @@ int main() {
*/ */
/* Expects a path to a DIMACS file */ /* Expects a path to a DIMACS file */
cnf* c = readDIMACS("/home/lev/Downloads/uf50/uf50-088.cnf");
/*
cnf* c = readDIMACS("/home/lev/Downloads/logistics/logistics.d.cnf");
sortlastnum(c);
printf("%u\n", c->cnts[0]);
gpusolve(c);
freecnf(c);
return 0;
*/
runTests();
return 0;
/*
// printf("%u\n", c->litcnt); // printf("%u\n", c->litcnt);
// for (u32 i = 0; i < c->clausecnt; ++i) printf("%u ", c->lastvars[i]); // for (u32 i = 0; i < c->clausecnt; ++i) printf("%u ", c->lastvars[i]);
@@ -282,6 +57,5 @@ int main() {
// if (res == 1) break; // if (res == 1) break;
freecnf(c); freecnf(c);
return 0;*/ return 0;
} }

438
ncnf.c
View File

@@ -1,7 +1,16 @@
#include "ncnf.h" #include "ncnf.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
cnf* readDIMACS(char* path) { #define CHECK(X, Y) if (X == NULL) { \
cnf* c = malloc(sizeof(cnf)); printf(Y); \
return NULL; \
}
ncnf* nreadDIMACS(char* path) {
ncnf* c = malloc(sizeof(ncnf));
CHECK(c, "Failed to alloc CNF struct\n") CHECK(c, "Failed to alloc CNF struct\n")
FILE* f = fopen(path, "r"); FILE* f = fopen(path, "r");
@@ -13,24 +22,17 @@ cnf* readDIMACS(char* path) {
CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n") CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n")
// 'c' marks the beginning of a comment line in DIMACs. Skip this line
while (buf[0] == 'c') { while (buf[0] == 'c') {
CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n") CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n");
} }
// The first non-comment line is the header, should have format 'p cnf [varcnt] [clausecnt]
// We skip the first bit by iterating and skipping any character that's p, c, n, f, or space
char* temp = buf; char* temp = buf;
while (*temp == ' ' || *temp == 'p' || *temp == 'c' || *temp == 'n' || *temp == 'f') { while (*temp == ' ' || *temp == 'p' || *temp == 'c' || *temp == 'n' || *temp == 'f') temp++;
temp++;
}
// Char by char we read in the number of variables c->varcnt = 0;
u32* varcnt = &(c->cnts[0]);
*varcnt = 0;
while (((u8) (*temp - '0')) < 10) { while (((u8) (*temp - '0')) < 10) {
*varcnt *= 10; c->varcnt *= 10;
*varcnt += (*temp - '0'); c->varcnt += (*temp - '0');
temp++; temp++;
} }
@@ -38,71 +40,73 @@ cnf* readDIMACS(char* path) {
while (*temp == ' ') temp++; while (*temp == ' ') temp++;
// Read in clausecnt // Read in clausecnt
u32* clausecnt = &(c->cnts[1]); c->clausecnt = 0;
*clausecnt = 0;
while (((u8) (*temp - '0')) < 10) { while (((u8) (*temp - '0')) < 10) {
*clausecnt *= 10; c->clausecnt *= 10;
*clausecnt += (*temp - '0'); c->clausecnt += (*temp - '0');
temp++; temp++;
} }
// Resize line buffer: the maximum clause size is varcnt, the maximum size of a u32 in decimal u32 nsize = sizeof(char) * (c->varcnt * 12LU) + 10;
// is 10, and conservatively given 2 extra characters for the - symbol and space delimiting,
// resizing the buffer to this should mean we never encounter a clause we fail to read.
u32 nsize = sizeof(char) * ((*varcnt) * 12LU) + 10;
char* nbuf = realloc(buf, nsize); char* nbuf = realloc(buf, nsize);
CHECK(nbuf, "Failed to realloc read buffer\n") CHECK(nbuf, "Failed to realloc read buffer\n");
buf = nbuf; buf = nbuf;
u32 cnt = 0;
u32 cap = 32; u32 cap = 32;
c->totcnt = 0;
c->clausedat = calloc(*clausecnt, sizeof(u32) * 3); c->cptrs = malloc(sizeof(u32) * c->clausecnt);
CHECK(c->clausedat, "Failed to allocate clause data\n") CHECK(c->cptrs, "Failed to allocate clause offsets\n")
c->variables = calloc(cap, sizeof(u32)); c->clens = malloc(sizeof(u32) * c->clausecnt);
CHECK(c->variables, "Failed to allocate literal variables\n") CHECK(c->clens, "Failed to allocate clause lens\n")
c->parities = calloc(cap, sizeof(u8)); c->lvars = malloc(sizeof(u32) * c->clausecnt);
CHECK(c->parities, "Failed to allocate literal parities\n"); CHECK(c->lvars, "Failed to alloc clause lvars\n")
for (u32 i = 0; i < *clausecnt; ++i) { c->varbak = calloc(cap, sizeof(u32));
c->clausedat[3 * i] = cnt; CHECK(c->varbak, "Failed to allocate variable backing array\n")
CHECK(fgets(buf, nsize, f), "Failed to read clause\n"); c->parbak = malloc(sizeof(u8) * cap);
CHECK(c->parbak, "Failed to allocate parity backing array\n");
for (u32 i = 0; i < c->clausecnt; ++i) {
CHECK(fgets(buf, nsize, f), "Failed to read clause\n")
c->clens[i] = 0;
c->cptrs[i] = c->totcnt;
temp = buf; temp = buf;
while (*temp == ' ') temp++; while (*temp == ' ') temp++;
bool empty = true; bool empty = true;
bool tr = true; bool pos = true;
while (*temp != '\n') { while (*temp != '\n' && *temp != '\0') {
if (cnt == cap) { if (c->totcnt == cap) {
// Out of space to add more literals, realloc arrays
u32 ncap = cap << 1U; u32 ncap = cap << 1U;
c->variables = realloc(c->variables, sizeof(u32) * ncap); c->varbak = realloc(c->varbak, sizeof(u32) * ncap);
memset(c->variables + cap, 0, sizeof(u32) * cap); CHECK(c->varbak, "Failed to realloc variable backing array\n")
CHECK(c->variables, "Failed to realloc variable array\n"); memset(c->varbak + cap, 0, sizeof(u32) * cap);
c->parities = realloc(c->parities, sizeof(u8) * ncap); c->parbak = realloc(c->parbak, sizeof(u8) * ncap);
memset(c->parities + cap, 0, sizeof(u8) * cap); CHECK(c->parbak, "Failed to realloc parity backing array\n")
CHECK(c->parities, "Failed to realloc parity array\n");
cap = ncap; cap = ncap;
} }
if (*temp == '-') { if (*temp == '-') {
tr = false; pos = false;
} else if (((u8) (*temp - '0')) < 10) { } else if (((u8) (*temp - '0')) < 10) {
// Read in the literal's digits if (*temp == '0' && c->clens[i] == 0 && c->varbak[c->totcnt] == 0) {
c->variables[cnt] *= 10; break;
c->variables[cnt] += (*temp - '0'); }
c->varbak[c->totcnt] *= 10;
c->varbak[c->totcnt] += (*temp - '0');
} else { } else {
// Skip any whitespace and pack the read value into the arrays, along with
// any additional data
while (temp[1] == ' ') temp++; while (temp[1] == ' ') temp++;
c->parities[cnt] = tr; c->parbak[c->totcnt] = pos;
c->variables[cnt] -= 1; c->clens[i]++;
if (c->clausedat[3 * i + 2] < c->variables[cnt]) c->clausedat[3 * i + 2] = c->variables[cnt]; c->varbak[c->totcnt] -= 1;
if (c->lvars[i] < c->varbak[c->totcnt]) c->lvars[i] = c->varbak[c->totcnt];
empty = false; empty = false;
tr = true; pos = true;
cnt++; (c->totcnt)++;
if (temp[1] == '0') { if (temp[1] == '0') {
break; break;
} }
@@ -114,20 +118,17 @@ cnf* readDIMACS(char* path) {
printf("UNSAT: Empty clause %u\n", i); printf("UNSAT: Empty clause %u\n", i);
return NULL; return NULL;
} }
c->clausedat[3 * i + 1] = cnt - c->clausedat[3 * i];
} }
// Lastvars is set to the index of the last variable of each clause. However, because in the for (u32 i = 0; i < c->clausecnt; ++i) {
// counter, the last variable has the lsb position, we flip the value of lastvars c->lvars[i] += 1;
for (u32 i = 0; i < *clausecnt; ++i) { c->lvars[i] = c->varcnt - c->lvars[i];
c->clausedat[3 * i + 2] += 1U;
c->clausedat[3 * i + 2] = *varcnt - c->clausedat[3 * i + 2];
} }
// Realloc the arrays to exactly match the number of literals c->varbak = realloc(c->varbak, sizeof(u32) * c->totcnt);
c->variables = realloc(c->variables, sizeof(u32) * cnt); CHECK(c->varbak, "Failed to do final resize on variable backing array\n")
c->parities = realloc(c->parities, sizeof(u8) * cnt); c->parbak = realloc(c->parbak, sizeof(u8) * c->totcnt);
c->cnts[2] = cnt; CHECK(c->parbak, "Failed to do final resize on parity backing array\n")
free(buf); free(buf);
if (fclose(f)) { if (fclose(f)) {
printf("Failed to close file\n"); printf("Failed to close file\n");
@@ -136,76 +137,301 @@ cnf* readDIMACS(char* path) {
return c; return c;
} }
void printcnf(cnf* c) { void nprintcnf(ncnf* c) {
printf("p cnf %u %u\n", c->cnts[0], c->cnts[1]); printf("p cnf %u %u\n", c->varcnt, c->clausecnt);
for (u32 i = 0; i < c->cnts[1]; ++i) { for (u32 i = 0; i < c->clausecnt; ++i) {
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) { for (u32 j = 0; j < c->clens[i]; ++j) {
if (c->parities[c->clausedat[3 * i] + j] == 0) printf("-"); printf("%s%u ", (c->parbak[c->cptrs[i] + j]) ? ("") : ("-"), c->varbak[c->cptrs[i] + j] + 1);
printf("%u ", c->variables[c->clausedat[3 * i] + j] + 1);
} }
printf("0\n"); printf("0\n");
} }
printf("\n");
} }
void freecnf(cnf* c) { void nprintcnf2(ncnf* c) {
free(c->clausedat); printf("p cnf %u %u\n", c->varcnt, c->clausecnt);
free(c->variables); for (u32 i = 0; i < c->clausecnt; ++i) {
free(c->parities); printf("lvr: %u - ", c->lvars[i]);
for (u32 j = 0; j < c->clens[i]; ++j) {
printf("%s%u ", (c->parbak[c->cptrs[i] + j]) ? ("") : ("-"), c->varbak[c->cptrs[i] + j] + 1);
}
printf("0\n");
}
printf("\n");
}
void nfreecnf(ncnf* c) {
free(c->varbak);
free(c->parbak);
free(c->cptrs);
free(c->clens);
free(c->lvars);
free(c); free(c);
} }
const u64 RBITS = 8; const u32 RBITS2 = 8;
const u64 RSIZE = 1LU << RBITS; const u32 RSIZE2 = 1U << RBITS2;
const u64 RLVLS = (31LU / RBITS) + 1; const u32 RLVLS2 = (31U / RBITS2) + 1;
const u64 RMASK = RSIZE - 1; const u32 RMASK2 = RSIZE2 - 1;
void sortlastnum(cnf* c) { void nsortlastnum(ncnf* c) {
// Radix sort on the clauses based on their lvars
bool v = false; bool v = false;
u32* d = calloc(c->cnts[1], sizeof(u32) * 3); u32* d = malloc(sizeof(u32) * c->clausecnt);
u32* d2 = malloc(sizeof(u32) * c->clausecnt);
u32* d22 = malloc(sizeof(u32) * c->clausecnt);
u32 qb[RLVLS * RSIZE];
u32 qe[RLVLS * RSIZE];
memset(qb, 0, sizeof(u32) * RLVLS * RSIZE);
memset(qe, 0, sizeof(u32) * RLVLS * RSIZE);
for (u32 pass = 0; pass < RLVLS; ++pass) {
u32 shift = pass * RBITS;
for (u32 i = 0; i < c->cnts[1]; ++i) { u32 qb[RLVLS2 * RSIZE2];
u32 val = UINT32_MAX - c->clausedat[3 * i + 2]; u32 qe[RLVLS2 * RSIZE2];
u32 ind = (val >> shift) & RMASK; memset(qb, 0, sizeof(u32) * RLVLS2 * RSIZE2);
qe[pass * RSIZE + ind]++; memset(qe, 0, sizeof(u32) * RLVLS2 * RSIZE2);
for (u32 pass = 0; pass < RLVLS2; ++pass) {
u32 shift = pass * RBITS2;
for (u32 i = 0; i < c->clausecnt; ++i) {
u32 val = UINT32_MAX - c->lvars[i];
u32 ind = (val >> shift) & RMASK2;
qe[pass * RSIZE2 + ind]++;
} }
u32 uc = 0; u32 uc = 0;
for (u32 i = 0; i < RSIZE; ++i) if (qe[pass * RSIZE + i]) uc++; for (u32 i = 0; i < RSIZE2; ++i) uc += (qe[pass * RSIZE2 + i] != 0);
if (uc == 1) continue; if (uc == 1) continue;
qb[pass * RSIZE] = 0; qb[pass * RSIZE2] = 0;
for (u32 i = 1; i < RSIZE; ++i) { for (u32 i = 1; i < RSIZE2; ++i) {
qb[pass * RSIZE + i] = qb[pass * RSIZE + i - 1] + qe[pass * RSIZE + i - 1]; qb[pass * RSIZE2 + i] = qb[pass * RSIZE2 + i - 1] + qe[pass * RSIZE2 + i - 1];
} }
for (u32 i = 0; i < c->cnts[1]; ++i) { for (u32 i = 0; i < c->clausecnt; ++i) {
u32 val = UINT32_MAX - c->clausedat[3 * i + 2]; u32 val = UINT32_MAX - c->lvars[i];
u32 ind = (val >> shift) & RMASK; u32 ind = (val >> shift) & RMASK2;
memcpy(d + 3 * qb[pass * RSIZE + ind], c->clausedat + 3 * i, sizeof(u32) * 3); d[qb[pass * RSIZE2 + ind]] = c->cptrs[i];
qb[pass * RSIZE + ind]++; d2[qb[pass * RSIZE2 + ind]] = c->clens[i];
__builtin_prefetch(d + 3 * qb[pass * RSIZE + ind] + 1); d22[qb[pass * RSIZE2 + ind]] = c->lvars[i];
qb[pass * RSIZE2 + ind]++;
__builtin_prefetch(d2 + qb[pass * RSIZE2 + ind] + 1);
__builtin_prefetch(d + qb[pass * RSIZE2 + ind] + 1);
} }
u32* tptr = c->clausedat; u32* tptr = c->cptrs;
c->clausedat = d; c->cptrs = d;
d = tptr; d = tptr;
tptr = c->clens;
c->clens = d2;
d2 = tptr;
tptr = c->lvars;
c->lvars = d22;
d22 = tptr;
v = !v; v = !v;
} }
if (v) { if (v) {
u32* tptr = c->clausedat; u32* tptr = c->cptrs;
c->clausedat = d; c->cptrs = d;
d = tptr; d = tptr;
memcpy(c->clausedat, d, sizeof(u32) * 3 * c->cnts[1]); tptr = c->clens;
c->clens = d2;
d2 = tptr;
tptr = c->lvars;
c->lvars = d22;
d22 = tptr;
v = !v;
memcpy(c->cptrs, d, c->clausecnt * sizeof(u32));
memcpy(c->clens, d2, c->clausecnt * sizeof(u32));
memcpy(c->lvars, d22, c->clausecnt * sizeof(u32));
}
free(d2);
free(d22);
u8* d3 = malloc(sizeof(u8) * c->clausecnt);
// Radix sort the literals of each clause (for reasons, TM)
u32 dlen = c->clausecnt;
for (u32 k = 0; k < c->clausecnt; ++k) {
if (c->clens[k] > dlen) {
d = realloc(d, sizeof(u32) * c->clens[k]);
d3 = realloc(d3, sizeof(u8) * c->clens[k]);
if (d == NULL || d3 == NULL) {
printf("I'm giving up\n");
}
dlen = c->clens[k];
}
memset(qb, 0, sizeof(u32) * RLVLS2 * RSIZE2);
memset(qe, 0, sizeof(u32) * RLVLS2 * RSIZE2);
for (u32 pass = 0; pass < RLVLS2; ++pass) {
u32 shift = pass * RBITS2;
for (u32 i = 0; i < c->clens[k]; ++i) {
u32 val = c->varbak[c->cptrs[k] + i];
u32 ind = (val >> shift) & RMASK2;
qe[pass * RSIZE2 + ind]++;
}
u32 uc = 0;
for (u32 i = 0; i < RSIZE2; ++i) uc += (qe[pass * RSIZE2 + i] != 0);
if (uc == 1) continue;
qb[pass * RSIZE2] = 0;
for (u32 i = 1; i < RSIZE2; ++i) {
qb[pass * RSIZE2 + i] = qb[pass * RSIZE2 + i - 1] + qe[pass * RSIZE2 + i - 1];
}
for (u32 i = 0; i < c->clens[k]; ++i) {
u32 val = c->varbak[c->cptrs[k] + i];
u32 ind = (val >> shift) & RMASK2;
d[qb[pass * RSIZE2 + ind]] = c->varbak[c->cptrs[k] + i];
d3[qb[pass * RSIZE2 + ind]] = c->parbak[c->cptrs[k] + i];
qb[pass * RSIZE2 + ind]++;
__builtin_prefetch(d + qb[pass * RSIZE2 + ind] + 1);
__builtin_prefetch(d3 + qb[pass * RSIZE2 + ind] + 1);
}
memcpy(c->varbak + c->cptrs[k], d, sizeof(u32) * c->clens[k]);
memcpy(c->parbak + c->cptrs[k], d3, sizeof(u8) * c->clens[k]);
}
} }
free(d); free(d);
free(d3);
}
void nsort(ncnf* c) {
bool v = false;
u32 dlen = c->clens[0];
u32* d = malloc(sizeof(u32) * dlen);
u8* dpar = malloc(sizeof(u8) * dlen);
u32 qb[RLVLS2 * RSIZE2];
u32 qe[RLVLS2 * RSIZE2];
for (u32 k = 0; k < c->clausecnt; ++k) {
if (c->clens[k] > dlen) {
d = realloc(d, sizeof(u32) * c->clens[k]);
dpar = realloc(dpar, sizeof(u8) * c->clens[k]);
if (d == NULL || dpar == NULL) {
printf("I'm giving up\n");
}
dlen = c->clens[k];
}
memset(qb, 0, sizeof(u32) * RLVLS2 * RSIZE2);
memset(qe, 0, sizeof(u32) * RLVLS2 * RSIZE2);
for (u32 pass = 0; pass < RLVLS2; ++pass) {
u32 shift = pass * RBITS2;
for (u32 i = 0; i < c->clens[k]; ++i) {
u32 val = c->varbak[c->cptrs[k] + i];
u32 ind = (val >> shift) & RMASK2;
qe[pass * RSIZE2 + ind]++;
}
u32 uc = 0;
for (u32 i = 0; i < RSIZE2; ++i) uc += (qe[pass * RSIZE2 + i] != 0);
if (uc == 1) continue;
qb[pass * RSIZE2] = 0;
for (u32 i = 1; i < RSIZE2; ++i) {
qb[pass * RSIZE2 + i] = qb[pass * RSIZE2 + i - 1] + qe[pass * RSIZE2 + i - 1];
}
for (u32 i = 0; i < c->clens[k]; ++i) {
u32 val = c->varbak[c->cptrs[k] + i];
u32 ind = (val >> shift) & RMASK2;
d[qb[pass * RSIZE2 + ind]] = c->varbak[c->cptrs[k] + i];
dpar[qb[pass * RSIZE2 + ind]] = c->parbak[c->cptrs[k] + i];
qb[pass * RSIZE2 + ind]++;
__builtin_prefetch(d + qb[pass * RSIZE2 + ind] + 1);
__builtin_prefetch(dpar + qb[pass * RSIZE2 + ind] + 1);
}
memcpy(c->varbak + c->cptrs[k], d, sizeof(u32) * c->clens[k]);
memcpy(c->parbak + c->cptrs[k], dpar, sizeof(u8) * c->clens[k]);
}
}
free(dpar);
/*
if (c->clausecnt > dlen) {
d = realloc(d, sizeof(u32) * c->clausecnt);
if (d == NULL) {
printf("I'm giving up x2\n");
}
dlen = c->clausecnt;
}
u32* d2 = malloc(sizeof(u32) * c->clausecnt);
u32* d3 = malloc(sizeof(u32) * c->clausecnt);
memset(qb, 0, sizeof(u32) * RLVLS2 * RSIZE2);
memset(qe, 0, sizeof(u32) * RLVLS2 * RSIZE2);
for (u32 pass = 0; pass < RLVLS2; ++pass) {
u32 shift = pass * RBITS2;
for (u32 i = 0; i < c->clausecnt; ++i) {
u32 val = c->varbak[c->cptrs[i]];
u32 ind = (val >> shift) & RMASK2;
qe[pass * RSIZE2 + ind]++;
}
u32 uc = 0;
for (u32 i = 0; i < RSIZE2; ++i) uc += (qe[pass * RSIZE2 + i] != 0);
if (uc == 1) continue;
qb[pass * RSIZE2] = 0;
for (u32 i = 1; i < RSIZE2; ++i) {
qb[pass * RSIZE2 + i] = qb[pass * RSIZE2 + i - 1] + qe[pass * RSIZE2 + i - 1];
}
for (u32 i = 0; i < c->clausecnt; ++i) {
u32 val = c->varbak[c->cptrs[i]];
u32 ind = (val >> shift) & RMASK2;
d[qb[pass * RSIZE2 + ind]] = c->cptrs[i];
d2[qb[pass * RSIZE2 + ind]] = c->clens[i];
d3[qb[pass * RSIZE2 + ind]] = c->lvars[i];
qb[pass * RSIZE2 + ind]++;
__builtin_prefetch(d2 + qb[pass * RSIZE2 + ind] + 1);
__builtin_prefetch(d + qb[pass * RSIZE2 + ind] + 1);
}
u32* tptr = c->cptrs;
c->cptrs = d;
d = tptr;
tptr = c->clens;
c->clens = d2;
d2 = tptr;
tptr = c->lvars;
c->lvars = d3;
d3 = tptr;
v = !v;
}
if (v) {
u32* tptr = c->cptrs;
c->cptrs = d;
d = tptr;
tptr = c->clens;
c->clens = d2;
d2 = tptr;
tptr = c->lvars;
c->lvars = d3;
d3 = tptr;
v = !v;
memcpy(c->cptrs, d, c->clausecnt * sizeof(u32));
memcpy(c->clens, d2, c->clausecnt * sizeof(u32));
memcpy(c->lvars, d3, c->clausecnt * sizeof(u32));
}
*/
free(d);
//free(d2);
// free(d3);
} }

32
ncnf.h
View File

@@ -1,25 +1,23 @@
#pragma once #pragma once
#include "types.h" #include "types.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define CHECK(X, Y) if (X == NULL) { \
printf(Y); \
return NULL; \
}
typedef struct { typedef struct {
u32 cnts[3]; // { varcnt, clausecnt } u32 varcnt;
u32* clausedat; // { ind, len, jval } u32 clausecnt;
u32* variables; u32* lvars;
u8* parities; u32* clens;
} cnf; u32* cptrs;
u32 totcnt;
u32* varbak;
u8* parbak;
} ncnf;
cnf* readDIMACS(char* path); ncnf* nreadDIMACS(char* path);
void printcnf(cnf* c); void nprintcnf(ncnf* c);
void nprintcnf2(ncnf* c);
void sortlastnum(cnf* c); void nsortlastnum(ncnf* c);
void nsort(ncnf* c);
void freecnf(cnf* c); void nfreecnf(ncnf* c);

192
psat.cl
View File

@@ -32,122 +32,120 @@ static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
} }
} }
void mul(uint* c, uint len, uint* a, uint b) { __kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) {
uint carry = 0; output[0] = 2;
for (uint i = 0; i < len; ++i) {
uint ncarry;
uint blo = a[i] & 0xFFFFU;
uint bhi = a[i] >> 16U;
uint ilo = b & 0xFFFFU;
uint ihi = b >> 16U;
*(c + i) = ilo * blo; __local uint setmax;
uint b1 = ilo * bhi;
uint c1 = ihi * blo;
ncarry = ihi * bhi;
b1 += c1; uint cnt = cnfheader[0];
ncarry += (b1 < c1) << 16U; uint vcnt = cnfheader[1];
uint bblo = b1 & 0xFFFFU; uint ccnt = cnfheader[2];
uint bbhi = b1 >> 16U;
bblo <<= 16U; uint wcnt = 1 + (vcnt >> 5U);
*(c + i) += bblo;
uchar acarry = *(c + i) < bblo; uint maxctr = 1U << (vcnt & 0b11111U);
//uint glbid = get_global_id(0);
//uint glbsz = get_global_size(0);
ncarry += bbhi + acarry;
c[i] += carry;
ncarry += c[i] < carry;
carry = ncarry;
}
}
__kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clausedat, __global const uint* vars, __global const uchar* pars, __global uint* output, __local uint* lctrs) {
uint locid = get_local_id(0); uint locid = get_local_id(0);
uint locsz = get_local_size(0); uint locsz = get_local_size(0);
// uint grpid = get_group_id(0); // uint grpid = get_group_id(0);
// uint grpcn = get_num_groups(0); // uint grpcn = get_num_groups(0);
uint globid = get_global_id(0);
uint globsz = get_global_size(0);
uint wcnt = 1U + (cnfhdr[0] >> 5U); // Zero out the counter
for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;
uint mode = 2; // Set all scratchpad clauses to true
uint index = 0; for (uint j = 0; j < ccnt; j += locsz) {
uint addval = 0; uchar cond = (j + locid) < ccnt;
j = j * cond + (!cond) * (ccnt - locid - 1);
scratchpad[j + locid] = 1;
if (globid == 0) output[0] = 0;
uint* ctr = lctrs + wcnt * 2 * locid;
uint* max = lctrs + wcnt * (2 * locid + 1);
for (uint i = 0; i < wcnt; ++i) {
ctr[i] = max[i] = 0;
} }
mul(ctr, wcnt, output + 1, globid); __local uint firstind[1];
if (globid == globsz) { while (output[0] == 2) {
stateaddpow(wcnt, max, cnfhdr[0]); firstind[0] = ccnt;
} else {
mul(max, wcnt, output + 1, globid + 1); setmax = 0;
} uint maxnumx = 0;
for (uint j = 0; j < cnt; j += locsz) {
uchar cond = (j + locid) < cnt;
// Last element cap
j = j * cond + (!cond) * (cnt - locid - 1);
uint varind = vars[j + locid];
varind = (vcnt - 1) - varind;
uint iind = varind >> 5U;
uint bind = varind & 0b11111U;
uchar cpar = (output[iind + 1] >> bind) & 1U;
if (cpar != pars[j + locid]) {
scratchpad[clauses[j + locid]] = 0;
}
}
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); for (uint j = 0; j < ccnt; j += locsz) {
if (scratchpad[j + locid] == 1 && (j + locid) < ccnt) {
setmax = 1;
}
}
uint varcnt = cnfhdr[0] - 1; barrier(CLK_LOCAL_MEM_FENCE);
while (output[0] == 0) {
uint chkmsk = 0xFFFFFFFFU * (mode == 2U); if (setmax) {
uint chkcls = index & chkmsk; // Set maxval array to zero
uint chkind = clausedat[3 * chkcls] + (addval & chkmsk); maxvals[locid] = 0;
uint var = vars[chkind];
uchar par = pars[chkind];
uint vword = (varcnt - var) >> 5U;
uint vbit = (varcnt - var) & 0b11111U;
uchar corpar = (ctr[vword] >> vbit) & 1U;
uchar isvalid = (par == corpar);
uchar islvar = ((addval + 1) == clausedat[3 * chkcls + 1]);
uchar isbchk0 = (mode == 2U);
uchar isbchk1 = isbchk0 & isvalid;
uchar isbchk2 = isbchk1 & islvar;
uint j = clausedat[3 * chkcls + 2];
mode -= 2 * isbchk2;
// if (isbchk2) printf("j: %u\n", j);
index = (j >> 5U) * isbchk2 + index * (!isbchk2);
addval = (1U << (j & 0b11111U)) * isbchk2 + addval * (!isbchk2);
addval += ((isbchk1) & (!islvar));
uchar isbchk3 = (isbchk0 & (!isvalid));
addval *= (!isbchk3);
index += (isbchk3);
uchar issat = (index == cnfhdr[1]) * (isbchk3);
uint cmpaddind = index * (mode != 2U); // Accumulate and reduce the maximums
uint nval = ctr[cmpaddind] + addval; // Find the result of the current step if it was addition for (uint j = 0; j < ccnt; j += locsz) {
addval = (nval < ctr[cmpaddind]) * (mode == 0) + (addval) * (mode == 2U); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone. //uint a = maxvals[locid];
ctr[cmpaddind] = nval * ((mode == 0) & !issat) + ctr[cmpaddind] * ((mode != 0) | issat); // If in add mode, set new ctr val, otherwise leave unchanged //uint b = lvars[j + locid];
addval -= (ctr[cmpaddind] < max[cmpaddind]) * (mode == 1); // If in comparison mode, decrement addval if less than // uint c = max(a, b);
addval += (ctr[cmpaddind] > max[cmpaddind]) * (mode == 1); // If in comparison mode, increment addval if greater than if ((j + locid) < ccnt && scratchpad[j + locid] == 1) {
uchar addcond = (addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word //maxvals[locid] = c;
uchar cmpcond = (addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word atomic_min(firstind, (j + locid));
uchar exittime = (mode == 1) & cmpcond & (addval != -1);
exittime |= issat;
if (exittime) { // If in cmpmode and the comparison result is not less than, unsat
if (issat) {
if (atomic_cmpxchg(output, 0, 1) == 0) {
for (uint i = 0; i < wcnt; ++i) {
output[i + 1] = ~ctr[i];
}
output[0] = 1;
} }
} }
return;
barrier(CLK_LOCAL_MEM_FENCE);
uint maxj = lvars[firstind[0]];
// Set all scratchpad clauses to true
for (uint j = 0; j < ccnt; j += locsz) {
uchar cond = (j + locid) < ccnt;
j = j * cond + (!cond) * (ccnt - locid - 1);
scratchpad[j + locid] = 1;
}
// Final reduction pass
/*
uint maxj = maxvals[0];
for (uint j = 1; j < locsz; ++j) {
maxj = max(maxj, maxvals[j]);
}
*/
// Add to the counter
if (locid == 0) {
stateaddpow(wcnt, output + 1, maxj);
}
if (output[wcnt] >= maxctr) {
output[0] = 1;
}
} else {
output[0] = 0;
if (locid == 0) {
for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];
}
} }
uchar cmpdone = cmpcond & (mode == 1); // if comparison completion conditions are satisfied and in CMP mode barrier(CLK_LOCAL_MEM_FENCE);
uint addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1
index = addindex * (mode == 0) + (index - (mode == 1)) * (mode != 0); // If in add mode, use addindex; if in cmp mode, decrement index by 1
index *= !cmpdone;
addval *= !(((addcond) & (mode == 0)) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged.
mode += addcond * (mode == 0) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode.
} }
} }

121
rng.c
View File

@@ -1,121 +0,0 @@
#include "rng.h"
#include <string.h>
#include "time.h"
#include "stdlib.h"
static inline u64 rotl(const u64 x, i32 k) {
return (x << k) | (x >> (64 - k));
}
void seed(rngstate* s, u64 seed) {
/* Unrolled & modified version of splitmix64
* https://prng.di.unimi.it/splitmix64.c */
u64 z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s->state[0] = z ^ (z >> 31);
z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s->state[1] = z ^ (z >> 31);
z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s->state[2] = z ^ (z >> 31);
z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s->state[3] = z ^ (z >> 31);
z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s->state[4] = z ^ (z >> 31);
z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s->state[5] = z ^ (z >> 31);
z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s->state[6] = z ^ (z >> 31);
z = (seed + 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s->state[7] = z ^ (z >> 31);
s->ready = true;
}
u64 ru64(rngstate* s) {
if (!s->ready) seed(s, utime());
const u64 res = rotl(s->state[1] * 5, 7) * 9;
const u64 t = s->state[1] << 11U;
s->state[2] ^= s->state[0];
s->state[5] ^= s->state[1];
s->state[1] ^= s->state[2];
s->state[7] ^= s->state[3];
s->state[3] ^= s->state[4];
s->state[4] ^= s->state[5];
s->state[0] ^= s->state[6];
s->state[6] ^= s->state[7];
s->state[6] ^= t;
s->state[7] = rotl(s->state[7], 21);
return res;
}
u64 ru64pow(rngstate* s, u64 max) {
u64 v = ru64(s) % max;
v++;
return max / v;
}
u32 ru32(rngstate* s) {
return (u32) ru64(s);
}
f64 rf64(rngstate* s) {
return ((f64) ru64(s)) / ((f64) UINT64_MAX);
}
f32 rf32(rngstate* s) {
return ((f32) ru32(s)) / ((f32) UINT32_MAX);
}
void jump(rngstate* s) {
static const u64 JUMP[] = { 0x33ed89b6e7a353f9, 0x760083d7955323be, 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db };
u64 t[8];
memset(t, 0, sizeof(u64) * 8);
for (i32 i = 0; i < 8; ++i) {
for (i32 b = 0; b < 64; ++b) {
if (JUMP[i] & 1LU << b) for (i32 w = 0; w < 8; ++w) t[w] ^= s->state[w];
ru64(s);
}
}
memcpy(s->state, t, sizeof(u64) * 8);
}
void longjump(rngstate* s) {
static const u64 LONGJUMP[] = { 0x11467fef8f921d28, 0xa2a819f2e79c8ea8, 0xa8299fc284b3959a, 0xb4d347340ca63ee1, 0x1cb0940bedbff6ce, 0xd956c5c4fa1f8e17, 0x915e38fd4eda93bc, 0x5b3ccdfa5d7daca5 };
u64 t[8];
memset(t, 0, sizeof(u64) * 8);
for (i32 i = 0; i < 8; ++i) {
for (i32 b = 0; b < 64; ++b) {
if (LONGJUMP[i] & 1LU << b) for (i32 w = 0; w < 8; ++w) t[w] ^= s->state[w];
ru64(s);
}
}
memcpy(s->state, t, sizeof(u64) * 8);
}

15
rng.h
View File

@@ -1,15 +0,0 @@
#pragma once
#include "types.h"
typedef struct {
bool ready;
u64 state[8];
} rngstate;
void seed(rngstate* s, u64 seed);
u64 ru64(rngstate* s);
u32 ru32(rngstate* s);
f64 rf64(rngstate* s);
f32 rf32(rngstate* s);
void jump(rngstate* s);
u64 ru64pow(rngstate* s, u64 max);

6
subtree.c Normal file
View File

@@ -0,0 +1,6 @@
#include "subtree.h"
#include "ncnf.h"
void brzozowski(ncnf* c) {
}

89
subtree.h Normal file
View File

@@ -0,0 +1,89 @@
#pragma once
#include "types.h"
#include <stdlib.h>
/*
void sortstrings(char** list, u32 len, u32 cnt) {
bool v = false;
u32* d = malloc(sizeof(u32) * c->litcnt);
u32* d2 = malloc(sizeof(u32) * c->litcnt);
u8* d3 = malloc(sizeof(u8) * c->litcnt);
u64 qb[RLVLS * RSIZE];
u64 qe[RLVLS * RSIZE];
memset(qb, 0, sizeof(u64) * RLVLS * RSIZE);
memset(qe, 0, sizeof(u64) * RLVLS * RSIZE);
for (u64 pass = 0; pass < RLVLS; ++pass) {
u64 shift = pass * RBITS;
for (u64 i = 0; i < N; ++i) {
// U32MAX - because we want to sort highest to lowest, not lowest to highest
u32 val = UINT32_MAX - c->lastvars[c->clauses[i]];
u64 ind = (val >> shift) & RMASK;
qe[pass * RSIZE + ind]++;
}
u64 uc = 0;
for (u64 i = 0; i < RSIZE; ++i) if (qe[pass * RSIZE + i]) uc++;
if (uc == 1) continue;
qb[pass * RSIZE] = 0;
for (u64 i = 1; i < RSIZE; ++i) {
qb[pass * RSIZE + i] = qb[pass * RSIZE + i - 1] + qe[pass * RSIZE + i - 1];
}
for (u64 i = 0; i < N; ++i) {
u32 val = UINT32_MAX - c->lastvars[c->clauses[i]];
u64 ind = (val >> shift) & RMASK;
d[qb[pass * RSIZE + ind]] = c->variables[i];
d2[qb[pass * RSIZE + ind]] = c->clauses[i];
d3[qb[pass * RSIZE + ind]] = c->pars[i];
qb[pass * RSIZE + ind]++;
__builtin_prefetch(d + qb[pass * RSIZE + ind] + 1);
__builtin_prefetch(d2 + qb[pass * RSIZE + ind] + 1);
__builtin_prefetch(d3 + qb[pass * RSIZE + ind] + 1);
}
// Every iteration we swap pointers to avoid doing a memcpy
u32* tptr = c->variables;
c->variables = d;
d = tptr;
tptr = c->clauses;
c->clauses = d2;
d2 = tptr;
u8* tptr2 = c->pars;
c->pars = d3;
d3 = tptr2;
v = !v;
}
// If the pointers are still swapped at the end, we swap them to their original locations and
// copy over the result
if (v) {
u32* tptr = c->variables;
c->variables = d;
d = tptr;
tptr = c->clauses;
c->clauses = d2;
d2 = tptr;
u8* tptr2 = c->pars;
c->pars = d3;
d3 = tptr2;
memcpy(c->variables, d, N * sizeof(u32));
memcpy(c->clauses, d2, N * sizeof(u32));
memcpy(c->pars, d3, N * sizeof(u8));
}
free(d);
free(d2);
free(d3);
// Initialize an array with all the clause indices in their current order
u32* swaparr = malloc(sizeof(u32) * c->clausecnt);
for (u32 i = 0; i < c->clausecnt; ++i) {
swaparr[i] = i;
}
}
*/

View File

@@ -24,7 +24,7 @@ i32 runTests() {
i32 runuf20() { i32 runuf20() {
// printf("Running against uf20\n"); printf("Running against uf20\n");
u32 passed = 0; u32 passed = 0;
u64 tottime = 0; u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) { for (u32 i = 0; i < 1000; ++i) {
@@ -33,59 +33,7 @@ i32 runuf20() {
cnf* c = readDIMACS(buf); cnf* c = readDIMACS(buf);
sortlastnum(c); sortlastnum(c, c->litcnt);
u64 start = utime();
i32 res = gpusolve(c);
u64 stop = utime();
tottime += (stop - start);
freecnf(c);
if (res == 1) passed++;
}
// printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0;
return 1;
}
i32 runuf50() {
// printf("Running against uf50\n");
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
char buf[128];
i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1);
cnf* c = readDIMACS(buf);
sortlastnum(c);
u64 start = utime();
i32 res = gpusolve(c);
u64 stop = utime();
tottime += (stop - start);
freecnf(c);
if (res == 1) passed++;
}
// printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0;
return 1;
}
i32 runuuf50() {
// printf("Running against uuf50\n");
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
char buf[128];
i32 len = sprintf(buf, "/home/lev/Downloads/uuf50/uuf50-0%u.cnf", i + 1);
cnf* c = readDIMACS(buf);
sortlastnum(c);
u64 start = utime(); u64 start = utime();
i32 res = gpusolve(c); i32 res = gpusolve(c);
@@ -95,8 +43,60 @@ i32 runuuf50() {
freecnf(c); freecnf(c);
if (res == 0) passed++; if (res == 0) passed++;
} }
// printf("Passed %u / 1000 tests\n", passed); printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0;
return 1;
}
i32 runuf50() {
printf("Running against uf50\n");
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
char buf[128];
i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1);
cnf* c = readDIMACS(buf);
sortlastnum(c, c->litcnt);
u64 start = utime();
i32 res = gpusolve(c);
u64 stop = utime();
tottime += (stop - start);
freecnf(c);
if (res == 0) passed++;
}
printf("Passed %u / 1000 tests\n", passed);
printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0;
return 1;
}
i32 runuuf50() {
printf("Running against uuf50\n");
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
char buf[128];
i32 len = sprintf(buf, "/home/lev/Downloads/uuf50/uuf50-0%u.cnf", i + 1);
cnf* c = readDIMACS(buf);
sortlastnum(c, c->litcnt);
u64 start = utime();
i32 res = gpusolve(c);
u64 stop = utime();
tottime += (stop - start);
freecnf(c);
if (res == 1) passed++;
}
printf("Passed %u / 1000 tests\n", passed);
printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0; if (passed == 1000) return 0;
return 1; return 1;
} }