Initial commit.
This commit is contained in:
9
CMakeLists.txt
Normal file
9
CMakeLists.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
cmake_minimum_required(VERSION 3.22)
|
||||
project(psat C)
|
||||
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
set(CMAKE_C_FLAGS "-O3")
|
||||
|
||||
add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h sstree.c sstree.h cpusolver.c cpusolver.h)
|
||||
|
||||
target_link_libraries(psat -lOpenCL)
|
||||
295
cnf.c
Normal file
295
cnf.c
Normal file
@@ -0,0 +1,295 @@
|
||||
#include "cnf.h"
|
||||
|
||||
cnf* readDIMACS(char* path) {
|
||||
cnf* c = malloc(sizeof(cnf));
|
||||
CHECK(c, "Failed to alloc CNF struct\n");
|
||||
|
||||
FILE* f = fopen(path, "r");
|
||||
CHECK(f, "Failed to open file\n");
|
||||
|
||||
u32 bufsize = 1000;
|
||||
char* buf = malloc(sizeof(char) * bufsize);
|
||||
CHECK(buf, "Failed to alloc read buffer\n");
|
||||
|
||||
CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n");
|
||||
|
||||
while (buf[0] == 'c') {
|
||||
CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n");
|
||||
}
|
||||
|
||||
char* temp = buf;
|
||||
while (*temp == ' ' || *temp == 'p' || *temp == 'c' || *temp == 'n' || *temp == 'f') {
|
||||
temp++;
|
||||
}
|
||||
|
||||
c->varcnt = 0;
|
||||
while (((u8) (*temp - '0')) < 10) {
|
||||
c->varcnt *= 10;
|
||||
c->varcnt += (*temp - '0');
|
||||
temp++;
|
||||
}
|
||||
|
||||
while (*temp == ' ') temp++;
|
||||
|
||||
c->clausecnt = 0;
|
||||
while (((u8) (*temp - '0')) < 10) {
|
||||
c->clausecnt *= 10;
|
||||
c->clausecnt += (*temp - '0');
|
||||
temp++;
|
||||
}
|
||||
|
||||
u32 nsize = sizeof(char) * (c->varcnt * 12LU) + 10;
|
||||
char* nbuf = realloc(buf, nsize);
|
||||
CHECK(nbuf, "Failed to realloc read buffer\n");
|
||||
|
||||
buf = nbuf;
|
||||
|
||||
u32 cnt = 0;
|
||||
u32 cap = 32;
|
||||
|
||||
c->variables = calloc( cap, sizeof(u32));
|
||||
CHECK(c->variables, "Failed to allocate literal variables\n");
|
||||
c->clauses = calloc( cap, sizeof(u32));
|
||||
CHECK(c->clauses, "Failed to allocate literal clauses\n");
|
||||
c->pars = calloc( cap, sizeof(u8));
|
||||
CHECK(c->pars, "Failed to allocate literal parities\n");
|
||||
c->lastvars = calloc( c->clausecnt, sizeof(u32));
|
||||
CHECK(c->lastvars, "Failed to allocate clause lastvars\n");
|
||||
|
||||
|
||||
for (u32 i = 0; i < c->clausecnt; ++i) {
|
||||
CHECK(fgets(buf, nsize, f), "Failed to read clause\n");
|
||||
|
||||
|
||||
|
||||
temp = buf;
|
||||
while (*temp == ' ') temp++;
|
||||
|
||||
bool empty = true;
|
||||
bool tr = true;
|
||||
while (*temp != '\n') {
|
||||
if (cnt == cap) {
|
||||
u32 ncap = cap << 1U;
|
||||
c->variables = realloc(c->variables, sizeof(u32) * ncap);
|
||||
memset(c->variables + cap, 0, sizeof(u32) * cap);
|
||||
CHECK(c->variables, "Failed to realloc variable array\n");
|
||||
c->clauses = realloc(c->clauses, sizeof(u32) * ncap);
|
||||
memset(c->clauses + cap, 0, sizeof(u32) * cap);
|
||||
CHECK(c->clauses, "Failed to realloc clause array\n");
|
||||
c->pars = realloc(c->pars, sizeof(u8) * ncap);
|
||||
memset(c->pars + cap, 0, sizeof(u8) * cap);
|
||||
CHECK(c->pars, "Failed to realloc parity array\n");
|
||||
cap = ncap;
|
||||
}
|
||||
|
||||
if (*temp == '-') {
|
||||
tr = false;
|
||||
} else if (((u8) (*temp - '0')) < 10) {
|
||||
c->variables[cnt] *= 10;
|
||||
c->variables[cnt] += (*temp - '0');
|
||||
} else {
|
||||
while (temp[1] == ' ') temp++;
|
||||
c->pars[cnt] = tr;
|
||||
c->clauses[cnt] = i;
|
||||
c->variables[cnt] -= 1;
|
||||
if (c->lastvars[i] < c->variables[cnt]) c->lastvars[i] = c->variables[cnt];
|
||||
empty = false;
|
||||
tr = true;
|
||||
cnt++;
|
||||
if (temp[1] == '0') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
temp++;
|
||||
}
|
||||
|
||||
if (empty) {
|
||||
printf("UNSAT: Empty clause %u\n", i);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < c->clausecnt; ++i) {
|
||||
c->lastvars[i] += 1U;
|
||||
c->lastvars[i] = c->varcnt - c->lastvars[i];
|
||||
}
|
||||
|
||||
c->litcnt = cnt;
|
||||
|
||||
c->variables = realloc(c->variables, sizeof(u32) * c->litcnt);
|
||||
c->clauses = realloc(c->clauses, sizeof(u32) * c->litcnt);
|
||||
c->pars = realloc(c->pars, sizeof(u8) * c->litcnt);
|
||||
free(buf);
|
||||
if (fclose(f)) {
|
||||
printf("Failed to close file\n");
|
||||
return NULL;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
void printcnf(cnf* c) {
|
||||
printf("p cnf %u %u\n", c->varcnt, c->clausecnt);
|
||||
u32 pclause = 0;
|
||||
for (u32 i = 0; i < c->litcnt; ++i) {
|
||||
if (c->clauses[i] != pclause) printf("0\n");
|
||||
if (c->pars[i] == 0) printf("-");
|
||||
printf("%u ", c->variables[i] + 1);
|
||||
pclause = c->clauses[i];
|
||||
}
|
||||
printf("0\n");
|
||||
}
|
||||
|
||||
void freecnf(cnf* c) {
|
||||
free(c->pars);
|
||||
free(c->clauses);
|
||||
free(c->variables);
|
||||
free(c->lastvars);
|
||||
free(c);
|
||||
}
|
||||
|
||||
const u64 RBITS = 8;
|
||||
const u64 RSIZE = 1LU << RBITS;
|
||||
const u64 RLVLS = (63LU / RBITS) + 1;
|
||||
const u64 RMASK = RSIZE - 1;
|
||||
|
||||
// Currently sorting array of program pointers by the score of the programs
|
||||
// I need to sort by lastvar, within that sort by clause
|
||||
|
||||
void sortlastnum(cnf* c, u64 N) {
|
||||
bool v = false;
|
||||
u32* d = malloc(sizeof(u32) * c->litcnt);
|
||||
u32* d2 = malloc(sizeof(u32) * c->litcnt);
|
||||
u8* d3 = malloc(sizeof(u8) * c->litcnt);
|
||||
|
||||
u64 qb[RLVLS * RSIZE];
|
||||
u64 qe[RLVLS * RSIZE];
|
||||
memset(qb, 0, sizeof(u64) * RLVLS * RSIZE);
|
||||
memset(qe, 0, sizeof(u64) * RLVLS * RSIZE);
|
||||
|
||||
for (u64 pass = 0; pass < RLVLS; ++pass) {
|
||||
u64 shift = pass * RBITS;
|
||||
|
||||
for (u64 i = 0; i < N; ++i) {
|
||||
u32 val = UINT32_MAX - c->lastvars[(c->clausecnt - 1) - c->clauses[i]];
|
||||
u64 ind = (val >> shift) & RMASK;
|
||||
qe[pass * RSIZE + ind]++;
|
||||
}
|
||||
|
||||
u64 uc = 0;
|
||||
for (u64 i = 0; i < RSIZE; ++i) if (qe[pass * RSIZE + i]) uc++;
|
||||
if (uc == 1) continue;
|
||||
|
||||
qb[pass * RSIZE] = 0;
|
||||
for (u64 i = 1; i < RSIZE; ++i) {
|
||||
qb[pass * RSIZE + i] = qb[pass * RSIZE + i - 1] + qe[pass * RSIZE + i - 1];
|
||||
}
|
||||
|
||||
for (u64 i = 0; i < N; ++i) {
|
||||
u32 val = UINT32_MAX - c->lastvars[(c->clausecnt - 1) - c->clauses[i]];
|
||||
u64 ind = (val >> shift) & RMASK;
|
||||
d[qb[pass * RSIZE + ind]] = c->variables[i];
|
||||
d2[qb[pass * RSIZE + ind]] = c->clauses[i];
|
||||
d3[qb[pass * RSIZE + ind]] = c->pars[i];
|
||||
qb[pass * RSIZE + ind]++;
|
||||
__builtin_prefetch(d + qb[pass * RSIZE + ind] + 1);
|
||||
__builtin_prefetch(d2 + qb[pass * RSIZE + ind] + 1);
|
||||
__builtin_prefetch(d3 + qb[pass * RSIZE + ind] + 1);
|
||||
}
|
||||
|
||||
u32* tptr = c->variables;
|
||||
c->variables = d;
|
||||
d = tptr;
|
||||
tptr = c->clauses;
|
||||
c->clauses = d2;
|
||||
d2 = tptr;
|
||||
u8* tptr2 = c->pars;
|
||||
c->pars = d3;
|
||||
d3 = tptr2;
|
||||
v = !v;
|
||||
}
|
||||
|
||||
if (v) {
|
||||
u32* tptr = c->variables;
|
||||
c->variables = d;
|
||||
d = tptr;
|
||||
tptr = c->clauses;
|
||||
c->clauses = d2;
|
||||
d2 = tptr;
|
||||
u8* tptr2 = c->pars;
|
||||
c->pars = d3;
|
||||
d3 = tptr2;
|
||||
memcpy(c->variables, d, N * sizeof(u32));
|
||||
memcpy(c->clauses, d2, N * sizeof(u32));
|
||||
memcpy(c->pars, d3, N * sizeof(u8));
|
||||
}
|
||||
free(d);
|
||||
free(d2);
|
||||
free(d3);
|
||||
|
||||
u32* swaparr = malloc(sizeof(u32) * c->clausecnt);
|
||||
for (u32 i = 0; i < c->clausecnt; ++i) {
|
||||
swaparr[i] = i;
|
||||
}
|
||||
|
||||
v = false;
|
||||
d = malloc(sizeof(u32) * c->clausecnt);
|
||||
|
||||
memset(qb, 0, sizeof(u64) * RLVLS * RSIZE);
|
||||
memset(qe, 0, sizeof(u64) * RLVLS * RSIZE);
|
||||
|
||||
for (u64 pass = 0; pass < RLVLS; ++pass) {
|
||||
u64 shift = pass * RBITS;
|
||||
|
||||
for (u64 i = 0; i < c->clausecnt; ++i) {
|
||||
u32 val = UINT32_MAX - c->lastvars[swaparr[i]];
|
||||
u64 ind = (val >> shift) & RMASK;
|
||||
qe[pass * RSIZE + ind]++;
|
||||
}
|
||||
|
||||
u64 uc = 0;
|
||||
for (u64 i = 0; i < RSIZE; ++i) if (qe[pass * RSIZE + i]) uc++;
|
||||
if (uc == 1) continue;
|
||||
|
||||
qb[pass * RSIZE] = 0;
|
||||
for (u64 i = 1; i < RSIZE; ++i) {
|
||||
qb[pass * RSIZE + i] = qb[pass * RSIZE + i - 1] + qe[pass * RSIZE + i - 1];
|
||||
}
|
||||
|
||||
for (u64 i = 0; i < c->clausecnt; ++i) {
|
||||
u32 val = UINT32_MAX - c->lastvars[swaparr[i]];
|
||||
u64 ind = (val >> shift) & RMASK;
|
||||
d[qb[pass * RSIZE + ind]] = swaparr[i];
|
||||
qb[pass * RSIZE + ind]++;
|
||||
__builtin_prefetch(d + qb[pass * RSIZE + ind] + 1);
|
||||
}
|
||||
|
||||
u32* tptr = swaparr;
|
||||
swaparr = d;
|
||||
d = tptr;
|
||||
v = !v;
|
||||
}
|
||||
|
||||
if (v) {
|
||||
u32* tptr = swaparr;
|
||||
swaparr = d;
|
||||
d = tptr;
|
||||
memcpy(swaparr, d, c->clausecnt * sizeof(u32));
|
||||
}
|
||||
//free(d);
|
||||
|
||||
for (uint i = 0; i < c->clausecnt; ++i) {
|
||||
d[swaparr[i]] = i;
|
||||
}
|
||||
|
||||
for (uint i = 0; i < c->litcnt; ++i) {
|
||||
c->clauses[i] = d[c->clauses[i]];
|
||||
}
|
||||
|
||||
for (uint i = 0; i < c->clausecnt; ++i) {
|
||||
swaparr[d[i]] = c->lastvars[i];
|
||||
}
|
||||
|
||||
memcpy(c->lastvars, swaparr, sizeof(u32) * c->clausecnt);
|
||||
free(swaparr);
|
||||
free(d);
|
||||
}
|
||||
31
cnf.h
Normal file
31
cnf.h
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define CHECK(X, Y) if (X == NULL) { \
|
||||
printf(Y); \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
|
||||
typedef struct {
|
||||
u32 varcnt;
|
||||
u32 litcnt;
|
||||
u32 clausecnt;
|
||||
u32* lastvars;
|
||||
u32* variables;
|
||||
u32* clauses;
|
||||
u8* pars;
|
||||
} cnf;
|
||||
|
||||
cnf* readDIMACS(char* path);
|
||||
|
||||
void printcnf(cnf* c);
|
||||
|
||||
|
||||
void sortlastnum(cnf* c, u64 N);
|
||||
|
||||
|
||||
void freecnf(cnf* c);
|
||||
1
cpusolver.c
Normal file
1
cpusolver.c
Normal file
@@ -0,0 +1 @@
|
||||
#include "cpusolver.h"
|
||||
31
cpusolver.h
Normal file
31
cpusolver.h
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
|
||||
typedef struct {
|
||||
u32 varcnt;
|
||||
u32 clausecnt;
|
||||
u32* clauseinds;
|
||||
u32* clausevals;
|
||||
|
||||
u32* watchlist;
|
||||
} cpusolver;
|
||||
|
||||
void cpusolve() {
|
||||
cpusolver s;
|
||||
|
||||
|
||||
}
|
||||
/*
|
||||
* Read in DIMACs
|
||||
* A clause is a list of variables and their assignments
|
||||
*
|
||||
* Create watchlists:
|
||||
* 2 entries for each list - flag bit for if its a binary clause
|
||||
*
|
||||
* 2-watch:
|
||||
* If one of them is marked false, remove and replace
|
||||
* if there are no other literals in the clause that are true, unitprop the last literal
|
||||
* if there are no literals that evaluate to true and the other watch is false, UNSAT
|
||||
* if a literal is true, do nothing
|
||||
* if 2 literals are unassigned, clause is unsat
|
||||
*/
|
||||
403
gpusolver.c
Normal file
403
gpusolver.c
Normal file
@@ -0,0 +1,403 @@
|
||||
#include "gpusolver.h"
|
||||
#include <CL/cl.h>
|
||||
#include "time.h"
|
||||
|
||||
|
||||
static const char kernel_source[4560] = "static inline void stateaddpow(uint wcnt, uint* state, uint pow) {\n"
|
||||
" uint corpow = pow & 0b11111U;\n"
|
||||
" uint startind = pow >> 5U;\n"
|
||||
" uint tr = 1U << corpow;\n"
|
||||
" uint tval = state[startind] + tr;\n"
|
||||
" uchar choice = !((tval > state[startind]) && (tval >= tr));\n"
|
||||
" state[startind] = tval;\n"
|
||||
" for (uint i = 0; i < wcnt; ++i) {\n"
|
||||
" uchar cond = (i > startind);\n"
|
||||
" state[i] += choice * cond;\n"
|
||||
" choice = choice & (state[i] == 0) * cond + (!cond) & choice;\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"__global static uint setmax;\n"
|
||||
"\n"
|
||||
"__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __global uint* maxvals) {\n"
|
||||
" output[0] = 2;\n"
|
||||
"\n"
|
||||
" uint cnt = cnfheader[0];\n"
|
||||
" uint vcnt = cnfheader[1];\n"
|
||||
" uint ccnt = cnfheader[2];\n"
|
||||
"\n"
|
||||
" uint wcnt = 1 + (vcnt >> 5U);\n"
|
||||
"\n"
|
||||
" // Zero out the counter\n"
|
||||
" for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;\n"
|
||||
"\n"
|
||||
" uint maxctr = 1U << (vcnt & 0b11111U);\n"
|
||||
"\n"
|
||||
" uint glbid = get_global_id(0);\n"
|
||||
" uint glbsz = get_global_size(0);\n"
|
||||
"\n"
|
||||
" /*\n"
|
||||
" uint locid = get_local_id(0);\n"
|
||||
" uint locsz = get_local_size(0);\n"
|
||||
" uint grpid = get_group_id(0);\n"
|
||||
" uint grpcn = get_num_groups(0);\n"
|
||||
" */\n"
|
||||
"\n"
|
||||
" bool done = false;\n"
|
||||
" uint iter = 0;\n"
|
||||
" while (output[0] == 2) {\n"
|
||||
" // if (glbid == 0) printf(\"%s\\n\", \":~\");\n"
|
||||
" setmax = 0;\n"
|
||||
" uint maxnumx = 0;\n"
|
||||
"\n"
|
||||
" // Set all scratchpad clauses to true\n"
|
||||
" for (uint j = 0; j < ccnt; j += glbsz) {\n"
|
||||
" //uchar cond = (j + glbid) < ccnt;\n"
|
||||
" // If ptr would go past end of array, set it to last element\n"
|
||||
" // j = j * cond + (!cond) * (ccnt - glbid - 1);\n"
|
||||
" if ((j + glbid) < ccnt) scratchpad[j + glbid] = 1;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" for (uint j = 0; j < cnt; j += glbsz) {\n"
|
||||
" // uchar cond = (j + glbid) < cnt;\n"
|
||||
" // Last element cap\n"
|
||||
" // j = j * cond + (!cond) * (cnt - glbid - 1);\n"
|
||||
" if ((j + glbid) < cnt) {\n"
|
||||
" uint varind = vars[j + glbid];\n"
|
||||
" varind = (vcnt - 1) - varind;\n"
|
||||
" uint iind = varind >> 5U;\n"
|
||||
" uint bind = varind & 0b11111U;\n"
|
||||
" uchar cpar = (output[iind + 1] >> bind) & 1U;\n"
|
||||
" if (cpar != pars[j + glbid]) scratchpad[clauses[j + glbid]] = 0;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
"\n"
|
||||
" for (uint j = 0; j < ccnt; j += glbsz) {\n"
|
||||
" if (((j + glbid) < ccnt) && (scratchpad[j + glbid] == 1)) {\n"
|
||||
" setmax = 1;\n"
|
||||
" // printf(\"%u\\n\", (~output[1]) & 0b11111);\n"
|
||||
" // printf(\"%u%u%u%u%u\\n\", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);\n"
|
||||
" // printf(\"%u - %u\\n\", j + glbid, scratchpad[j + glbid]);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // uchar cond = (j + glbid) < cnt;\n"
|
||||
" // Last element cap\n"
|
||||
" // j = j * cond + (!cond) * (cnt - glbid - 1);\n"
|
||||
" // if (scratchpad[j + glbid] == 1) setmax = true;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" if (setmax) {\n"
|
||||
" // Set maxval array to zero\n"
|
||||
" maxvals[glbid] = 0;\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Accumulate and reduce the maximums\n"
|
||||
" for (uint j = 0; j < ccnt; j += glbsz) {\n"
|
||||
" uint a = maxvals[glbid];\n"
|
||||
" uint b = scratchpad[j + glbid] * lvars[j + glbid];\n"
|
||||
" uint c = max(a, b);\n"
|
||||
" if ((j + glbid) < ccnt) maxvals[glbid] = c;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
"\n"
|
||||
" // Final reduction pass\n"
|
||||
" uint maxj = maxvals[0];\n"
|
||||
" for (uint j = 1; j < glbsz; ++j) {\n"
|
||||
" maxj = max(maxj, maxvals[j]);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // Add to the counter\n"
|
||||
" if (glbid == 0) {\n"
|
||||
" // printf(\"> %u\\n\", maxj);\n"
|
||||
" stateaddpow(wcnt, output + 1, maxj);\n"
|
||||
" // printf(\">> %u%u%u%u%u\\n\", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" // Check counter for overflow\n"
|
||||
" if (output[wcnt] >= maxctr) {\n"
|
||||
" output[0] = 1;\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
" } else {\n"
|
||||
" // SAT. Set status and assignment.\n"
|
||||
" output[0] = 0;\n"
|
||||
" if (glbid == 0) {\n"
|
||||
" for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];\n"
|
||||
" }\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
" iter++;\n"
|
||||
" }\n"
|
||||
"}";
|
||||
|
||||
|
||||
static const size_t kernel_len = 4559;
|
||||
|
||||
#define GLOBAL_SIZE (256)
|
||||
#define LOCAL_SIZE (GLOBAL_SIZE)
|
||||
|
||||
#define CHECKASGN (true)
|
||||
|
||||
#define DEBUG
|
||||
|
||||
i32 gpusolve(cnf* c) {
|
||||
cl_platform_id platformid = NULL;
|
||||
cl_device_id deviceid = NULL;
|
||||
cl_uint numdevices;
|
||||
cl_uint numplatforms;
|
||||
|
||||
FILE *fp;
|
||||
char *source_str;
|
||||
size_t source_size;
|
||||
|
||||
fp = fopen("../psat.cl", "r");
|
||||
if (!fp) {
|
||||
fprintf(stderr, "Failed to load kernel.\n");
|
||||
exit(1);
|
||||
}
|
||||
source_str = (char*)malloc(0x100000);
|
||||
source_size = fread( source_str, 1, 0x100000, fp);
|
||||
fclose( fp );
|
||||
|
||||
u32 wordcnt = 1 + ((c->varcnt) >> 5U);
|
||||
|
||||
u32* solution = calloc((wordcnt + 1), sizeof(u32));
|
||||
if (solution == NULL) {
|
||||
printf("Failed to allocate solution buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve OpenCL platform IDs\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Found %u platforms\n", numplatforms);
|
||||
res = clGetDeviceIDs(platformid, CL_DEVICE_TYPE_GPU, 1, &deviceid, &numdevices);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve OpenCL device IDs\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Found %u devices\n", numdevices);
|
||||
|
||||
cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL context\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cl_command_queue commqueue = clCreateCommandQueueWithProperties(context, deviceid, 0, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL command queue\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Device memory buffers:
|
||||
/* For the CNF:
|
||||
* {clausecnt, literalcnt, varcnt)
|
||||
* variable array
|
||||
* clause array
|
||||
* parity array
|
||||
*
|
||||
* Other:
|
||||
* Status
|
||||
* A single counter
|
||||
*/
|
||||
|
||||
// TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer
|
||||
cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF header buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, c->clausecnt * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF lvar buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF variable buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuclauses = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF clause buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uchar), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF parity buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Allocate scratchpad memory
|
||||
cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, c->clausecnt * sizeof(cl_uchar), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF subsumption scratchpad buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
/*
|
||||
cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF maxval buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
*/
|
||||
|
||||
cl_mem gpuoutput = clCreateBuffer(context, CL_MEM_READ_WRITE, (wordcnt + 1) * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create output buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
u32 cnfheader[3] = { c->litcnt, c->varcnt, c->clausecnt };
|
||||
|
||||
// Load buffers to GPU
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 3 * sizeof(cl_uint), cnfheader, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF header write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, c->clausecnt * sizeof(cl_uint), c->lastvars, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF lvar write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->variables, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF variable write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuclauses, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->clauses, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF clause write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->litcnt * sizeof(cl_uchar), c->pars, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF parity write\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
const char* kernelptr = kernel_source;
|
||||
|
||||
cl_program satprog = clCreateProgramWithSource(context, 1, (const char**) &source_str, (const size_t*) &source_size, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL program\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
res = clBuildProgram(satprog, 1, &deviceid, NULL, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
char* logbuf = malloc(sizeof(char) * 65536);
|
||||
size_t loglen = 0;
|
||||
res = clGetProgramBuildInfo(satprog, deviceid, CL_PROGRAM_BUILD_LOG, sizeof(char) * 65536, logbuf, &loglen);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve build logs\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Build failed\n");
|
||||
printf("%s\n", logbuf);
|
||||
free(logbuf);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cl_kernel kernel = clCreateKernel(satprog, "vectorSAT", &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create kernel\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
|
||||
res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
|
||||
res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
|
||||
res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuclauses);
|
||||
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuparities);
|
||||
|
||||
res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuoutput);
|
||||
|
||||
res = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*) &gpuscratchpad);
|
||||
res = clSetKernelArg(kernel, 7, LOCAL_SIZE * sizeof(cl_uint), NULL);
|
||||
|
||||
|
||||
u64 starttime = utime();
|
||||
size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE };
|
||||
res = clEnqueueNDRangeKernel(commqueue, kernel, 1, NULL, itemsize, itemsize + 1, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue kernel for execution\n");
|
||||
exit(res);
|
||||
}
|
||||
|
||||
|
||||
|
||||
res = clEnqueueReadBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to read kernel output\n");
|
||||
exit(1);
|
||||
}
|
||||
u64 endtime = utime();
|
||||
|
||||
if (solution[0] == 1) {
|
||||
printf("UNSAT\n");
|
||||
} else if (solution[0] == 0) {
|
||||
printf("SAT\n");
|
||||
for (u32 k = 0; k < c->varcnt; ++k) {
|
||||
u32 vind = (c->varcnt - 1) - k;
|
||||
u32 iind = vind >> 5U;
|
||||
u32 bind = vind & 0b11111U;
|
||||
u8 par = (solution[iind + 1] >> bind) & 1U;
|
||||
printf("%u", par);
|
||||
}
|
||||
printf("\n");
|
||||
if (CHECKASGN) {
|
||||
u8* assigncheck = calloc(c->clausecnt, sizeof(u8));
|
||||
for (u32 i = 0; i < c->litcnt; ++i) {
|
||||
u32 g = ((c->varcnt - 1) - c->variables[i]) >> 5U;
|
||||
u32 h = ((c->varcnt - 1) - c->variables[i]) & 0b11111U;
|
||||
u8 paract = (solution[g + 1] >> h) & 1U;
|
||||
if (c->pars[i] == paract) assigncheck[c->clauses[i]] = true;
|
||||
}
|
||||
for (u32 i = 0; i < c->clausecnt; ++i) {
|
||||
if (!assigncheck[i]) {
|
||||
printf("Failed assignment check\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
free(assigncheck);
|
||||
printf("Passed assignment check\n");
|
||||
}
|
||||
} else {
|
||||
printf("What the fuck???\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Actual time: %f seconds\n", ((f64) (endtime - starttime)) / 1000000.0);
|
||||
|
||||
res = clFlush(commqueue);
|
||||
res = clFinish(commqueue);
|
||||
res = clReleaseKernel(kernel);
|
||||
res = clReleaseProgram(satprog);
|
||||
res = clReleaseMemObject(gpuheader);
|
||||
res = clReleaseMemObject(gpulvars);
|
||||
res = clReleaseMemObject(gpuvariables);
|
||||
res = clReleaseMemObject(gpuclauses);
|
||||
res = clReleaseMemObject(gpuparities);
|
||||
res = clReleaseMemObject(gpuoutput);
|
||||
res = clReleaseMemObject(gpuscratchpad);
|
||||
res = clReleaseCommandQueue(commqueue);
|
||||
res = clReleaseContext(context);
|
||||
i32 retval = solution[0];
|
||||
free(solution);
|
||||
free(source_str);
|
||||
|
||||
return retval;
|
||||
}
|
||||
4
gpusolver.h
Normal file
4
gpusolver.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#pragma once
|
||||
#include "cnf.h"
|
||||
|
||||
i32 gpusolve(cnf* c);
|
||||
47
main.c
Normal file
47
main.c
Normal file
@@ -0,0 +1,47 @@
|
||||
#include <stdio.h>
|
||||
#include "cnf.h"
|
||||
#include "gpusolver.h"
|
||||
#include "time.h"
|
||||
|
||||
int main() {
|
||||
/*
|
||||
srand( utime());
|
||||
u32 cnt = 4096;
|
||||
printf("p cnf %u %u\n", cnt, cnt);
|
||||
for (u32 i = 0; i < cnt; ++i) {
|
||||
printf("%s%u 0\n", (rand() & 1U) ? ("") : ("-"), i + 1);
|
||||
if ((i & 0xFF) == 0) {
|
||||
srand(utime() ^ rand());
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
*/
|
||||
/*
|
||||
for (uint i = 0; i < 1000; ++i) {
|
||||
char buf[128];
|
||||
i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1);
|
||||
|
||||
|
||||
|
||||
}
|
||||
*/
|
||||
cnf* c = readDIMACS("/home/lev/Downloads/uf50/uf50-088.cnf");
|
||||
// printf("%u\n", c->litcnt);
|
||||
|
||||
// for (u32 i = 0; i < c->clausecnt; ++i) printf("%u ", c->lastvars[i]);
|
||||
//printf("\ntomato\n");
|
||||
sortlastnum(c, c->litcnt);
|
||||
// printf("%u\n", c->lastvars[c->clauses[0]]);
|
||||
|
||||
//printf("%u %u %u\n\n", c->litcnt, c->varcnt, c->clausecnt);
|
||||
u64 start = utime();
|
||||
i32 res = gpusolve(c);
|
||||
u64 stop = utime();
|
||||
// printf("%s : %d\n", buf, res);
|
||||
printf("Took %f seconds\n", ((f64) (stop - start)) / 1000000.0);
|
||||
// printf("please don't\n");
|
||||
// if (res == 1) break;
|
||||
freecnf(c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
151
psat.cl
Normal file
151
psat.cl
Normal file
@@ -0,0 +1,151 @@
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
|
||||
#define DEBUG
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DBG(X) printf("%s\n", (X))
|
||||
#else
|
||||
#define DBG(X) do {} while (0)
|
||||
#endif
|
||||
|
||||
static inline void printbits(uint a) {
|
||||
for (uint i = 0; i < 32; ++i) {
|
||||
uint ind = 31 - i;
|
||||
printf("%u", (a >> ind) & 1U);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
|
||||
uint corpow = pow & 0b11111U;
|
||||
uint startind = pow >> 5U;
|
||||
uint tr = 1U << corpow;
|
||||
uint tval = state[startind] + tr;
|
||||
bool carry = tval < state[startind];
|
||||
state[startind] = tval;
|
||||
if (carry) {
|
||||
uint i = startind + 1;
|
||||
for ( ; i < wcnt; ++i) {
|
||||
state[i]++;
|
||||
if (state[i]) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) {
|
||||
output[0] = 2;
|
||||
|
||||
__local uint setmax;
|
||||
|
||||
uint cnt = cnfheader[0];
|
||||
uint vcnt = cnfheader[1];
|
||||
uint ccnt = cnfheader[2];
|
||||
|
||||
uint wcnt = 1 + (vcnt >> 5U);
|
||||
|
||||
uint maxctr = 1U << (vcnt & 0b11111U);
|
||||
|
||||
//uint glbid = get_global_id(0);
|
||||
//uint glbsz = get_global_size(0);
|
||||
|
||||
|
||||
uint locid = get_local_id(0);
|
||||
uint locsz = get_local_size(0);
|
||||
// uint grpid = get_group_id(0);
|
||||
// uint grpcn = get_num_groups(0);
|
||||
|
||||
// Zero out the counter
|
||||
for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;
|
||||
|
||||
// Set all scratchpad clauses to true
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
uchar cond = (j + locid) < ccnt;
|
||||
j = j * cond + (!cond) * (ccnt - locid - 1);
|
||||
scratchpad[j + locid] = 1;
|
||||
}
|
||||
|
||||
__local uint firstind[1];
|
||||
while (output[0] == 2) {
|
||||
firstind[0] = ccnt;
|
||||
|
||||
setmax = 0;
|
||||
uint maxnumx = 0;
|
||||
|
||||
for (uint j = 0; j < cnt; j += locsz) {
|
||||
|
||||
uchar cond = (j + locid) < cnt;
|
||||
// Last element cap
|
||||
j = j * cond + (!cond) * (cnt - locid - 1);
|
||||
uint varind = vars[j + locid];
|
||||
varind = (vcnt - 1) - varind;
|
||||
uint iind = varind >> 5U;
|
||||
uint bind = varind & 0b11111U;
|
||||
uchar cpar = (output[iind + 1] >> bind) & 1U;
|
||||
|
||||
if (cpar != pars[j + locid]) {
|
||||
scratchpad[clauses[j + locid]] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
if (scratchpad[j + locid] == 1 && (j + locid) < ccnt) {
|
||||
setmax = 1;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (setmax) {
|
||||
// Set maxval array to zero
|
||||
maxvals[locid] = 0;
|
||||
|
||||
// Accumulate and reduce the maximums
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
//uint a = maxvals[locid];
|
||||
//uint b = lvars[j + locid];
|
||||
// uint c = max(a, b);
|
||||
if ((j + locid) < ccnt && scratchpad[j + locid] == 1) {
|
||||
//maxvals[locid] = c;
|
||||
atomic_min(firstind, (j + locid));
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint maxj = lvars[firstind[0]];
|
||||
|
||||
// Set all scratchpad clauses to true
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
uchar cond = (j + locid) < ccnt;
|
||||
j = j * cond + (!cond) * (ccnt - locid - 1);
|
||||
scratchpad[j + locid] = 1;
|
||||
}
|
||||
|
||||
// Final reduction pass
|
||||
/*
|
||||
uint maxj = maxvals[0];
|
||||
for (uint j = 1; j < locsz; ++j) {
|
||||
maxj = max(maxj, maxvals[j]);
|
||||
}
|
||||
*/
|
||||
|
||||
// Add to the counter
|
||||
if (locid == 0) {
|
||||
stateaddpow(wcnt, output + 1, maxj);
|
||||
}
|
||||
|
||||
if (output[wcnt] >= maxctr) {
|
||||
output[0] = 1;
|
||||
}
|
||||
} else {
|
||||
output[0] = 0;
|
||||
if (locid == 0) {
|
||||
for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
35
time.c
Normal file
35
time.c
Normal file
@@ -0,0 +1,35 @@
|
||||
#include "time.h"
|
||||
#include <sys/time.h>
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
|
||||
struct timeval tv;
|
||||
|
||||
u64 utime() {
|
||||
gettimeofday(&tv, 0);
|
||||
return tv.tv_sec * 1000000LU + tv.tv_usec;
|
||||
}
|
||||
|
||||
u64 mtime() {
|
||||
gettimeofday(&tv, 0);
|
||||
return tv.tv_sec * 1000LU + (tv.tv_usec / 1000);
|
||||
}
|
||||
|
||||
i32 msleep(u64 msec) {
|
||||
struct timespec ts;
|
||||
i32 res;
|
||||
|
||||
if (msec < 0) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ts.tv_sec = msec / 1000;
|
||||
ts.tv_nsec = (msec % 1000) * 1000000;
|
||||
|
||||
do {
|
||||
res = nanosleep(&ts, &ts);
|
||||
} while (res && errno == EINTR);
|
||||
|
||||
return res;
|
||||
}
|
||||
6
time.h
Normal file
6
time.h
Normal file
@@ -0,0 +1,6 @@
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
|
||||
u64 utime();
|
||||
u64 mtime();
|
||||
i32 msleep(u64 msec);
|
||||
16
types.h
Normal file
16
types.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
||||
typedef int8_t i8;
|
||||
typedef int16_t i16;
|
||||
typedef int32_t i32;
|
||||
typedef int64_t i64;
|
||||
|
||||
typedef float f32;
|
||||
typedef double f64;
|
||||
Reference in New Issue
Block a user