Compare commits
6 Commits
feature/su
...
feature/gp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3f54ad3f6e | ||
|
|
53a580d0ec | ||
|
|
e2b633c802 | ||
|
|
84374e23fd | ||
|
|
4c111f4b6f | ||
|
|
a1b6cdaea9 |
@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.22)
|
||||
project(psat C)
|
||||
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
# set(CMAKE_C_FLAGS "-O3")
|
||||
set(CMAKE_C_FLAGS "-mavx2 -O3 -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts -ftree-vectorize")
|
||||
|
||||
add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h)
|
||||
add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h ncnf.c ncnf.h rng.h rng.c csflocref.c csflocref.h)
|
||||
|
||||
target_link_libraries(psat -lOpenCL)
|
||||
target_link_libraries(psat -lOpenCL -lgmp)
|
||||
4
cnf.c
4
cnf.c
@@ -1,5 +1,6 @@
|
||||
#include "cnf.h"
|
||||
|
||||
/*
|
||||
cnf* readDIMACS(char* path) {
|
||||
cnf* c = malloc(sizeof(cnf));
|
||||
CHECK(c, "Failed to alloc CNF struct\n");
|
||||
@@ -324,4 +325,5 @@ void sortlastnum(cnf* c, u64 N) {
|
||||
memcpy(c->lastvars, swaparr, sizeof(u32) * c->clausecnt);
|
||||
free(swaparr);
|
||||
free(d);
|
||||
}
|
||||
}
|
||||
*/
|
||||
6
cnf.h
6
cnf.h
@@ -3,6 +3,8 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
/*
|
||||
|
||||
|
||||
#define CHECK(X, Y) if (X == NULL) { \
|
||||
printf(Y); \
|
||||
@@ -20,6 +22,7 @@ typedef struct {
|
||||
u8* pars;
|
||||
} cnf;
|
||||
|
||||
|
||||
cnf* readDIMACS(char* path);
|
||||
|
||||
void printcnf(cnf* c);
|
||||
@@ -28,4 +31,5 @@ void printcnf(cnf* c);
|
||||
void sortlastnum(cnf* c, u64 N);
|
||||
|
||||
|
||||
void freecnf(cnf* c);
|
||||
void freecnf(cnf* c);
|
||||
*/
|
||||
663
cpusolver.c
663
cpusolver.c
@@ -1 +1,664 @@
|
||||
#include "cpusolver.h"
|
||||
#include "csflocref.h"
|
||||
|
||||
static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
|
||||
uint corpow = pow & 0b11111U;
|
||||
uint startind = pow >> 5U;
|
||||
uint tr = 1U << corpow;
|
||||
uint tval = state[startind] + tr;
|
||||
bool carry = tval < state[startind];
|
||||
state[startind] = tval;
|
||||
if (carry) {
|
||||
uint i = startind + 1;
|
||||
for ( ; i < wcnt; ++i) {
|
||||
state[i]++;
|
||||
if (state[i]) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define DEBUGSAT
|
||||
|
||||
#define ADD (0)
|
||||
#define CMP (1)
|
||||
#define BCT (2)
|
||||
#define CHK (3)
|
||||
|
||||
void ctrthings3(cnf* c, u32* state, u32* ctr, const u32* max) {
|
||||
u32 varcnt = c->cnts[0];
|
||||
u32 clausecnt = c->cnts[1];
|
||||
u32 wcnt = 1U + (varcnt >> 5U);
|
||||
u32* mode = state;
|
||||
u32* index = state + 1;
|
||||
u32* addval = state + 2;
|
||||
|
||||
// printf("> %u %u %u %u %u\n", *mode, *index, *addval, *ctr, *max);
|
||||
|
||||
|
||||
u8 addmode = (*mode == ADD);
|
||||
u8 cmpmode = (*mode == CMP);
|
||||
u8 bctmode = (*mode == BCT);
|
||||
u8 submode = (*mode == CHK);
|
||||
|
||||
// 1 mask for if in subcheck mode, 1 for if not
|
||||
u32 submask = 0xFFFFFFFFU * submode;
|
||||
u32 nsubmask = ~submask;
|
||||
u32 addmask = 0xFFFFFFFFU * addmode;
|
||||
u32 naddmask = ~addmask;
|
||||
|
||||
/* subcheck */
|
||||
// used to find correct ctr index
|
||||
u32 varcor = varcnt - 1;
|
||||
// Mask to prevent oob accesses for when (varcnt / 32) > clausecnt (or smth like that)
|
||||
u32 currclause = *index & submask;
|
||||
// clausedat records are 3 words long, multiply index by 3
|
||||
u32 currclauseshifted = currclause * 3U;
|
||||
// Retrieve beginning index of clause, add current var index
|
||||
u32 currvarind = c->clausedat[currclauseshifted] + (*addval & submask);
|
||||
// Retrieve current var & parity
|
||||
u32 currvar = c->variables[currvarind];
|
||||
u8 corrpar = c->parities[currvarind];
|
||||
// Calculate index in ctr
|
||||
u32 currvarcorr = (varcor - currvar);
|
||||
u32 currvarword = currvarcorr >> 5U;
|
||||
u32 currvarbit = currvarcorr & 0b11111U;
|
||||
|
||||
/* Can we make there be 1 ctr read per iteration? */
|
||||
// If in sub mode, retrieve current var's bit, else return ctr[*index]
|
||||
u32 ctrind = (currvarword & submask) | (*index & nsubmask);
|
||||
u32 ctrval = ctr[ctrind];
|
||||
u32 maxval = max[*index & nsubmask];
|
||||
|
||||
// Extract parity bit from ctr
|
||||
u32 currpar = (ctrval >> currvarbit) & 1U;
|
||||
// Check if assignment parity matches clause parity
|
||||
u8 subvalid = currpar == corrpar;
|
||||
// Don't mask addval because it's not being used for a lookup
|
||||
u8 islvar = c->clausedat[currclauseshifted + 1U] == (*addval + 1);
|
||||
u32 jval = c->clausedat[currclauseshifted + 2U];
|
||||
u32 jword = jval >> 5U;
|
||||
u32 jbit = 1U << (jval & 0b11111U);
|
||||
|
||||
u8 islclause = (clausecnt - 1) <= *index;
|
||||
|
||||
|
||||
u8 endOfCtr = (ctrind == (wcnt - 1));
|
||||
u8 begOfCtr = (ctrind == 0);
|
||||
|
||||
/* add */
|
||||
u32 nctr = ctrval + *addval;
|
||||
u8 addoverflow = (nctr < ctrval);
|
||||
|
||||
/* cmp */
|
||||
u8 cmpresult = -(ctrval < maxval);
|
||||
cmpresult += (ctrval > maxval);
|
||||
u8 cmpnores = cmpresult == 0;
|
||||
u8 cmpisless = cmpresult == 255U;
|
||||
|
||||
/* bitcnt */
|
||||
u32 bitcntval = (ctrval & -ctrval);
|
||||
u32 bitcnt = ((bitcntval & 0x0000FFFFU) != 0) << 4U;
|
||||
bitcnt |= ((bitcntval & 0x00FF00FF) != 0) << 3U;
|
||||
bitcnt |= ((bitcntval & 0x0F0F0F0F) != 0) << 2U;
|
||||
bitcnt |= ((bitcntval & 0x33333333) != 0) << 1U;
|
||||
bitcnt |= (bitcntval & 0x55555555) != 0;
|
||||
bitcnt += bitcntval != 0;
|
||||
bitcntval = 32 - bitcnt;
|
||||
u8 ctrWordIsZero = ctrval == 0;
|
||||
|
||||
// Set current ctr word
|
||||
// If in add mode, update ctr val, otherwise leave unchanged
|
||||
ctr[ctrind] = (nctr & addmask) | (ctr[ctrind] & naddmask);
|
||||
// Set current index
|
||||
|
||||
|
||||
/*
|
||||
* jword : (subcheck & valid & lastvar)
|
||||
* index : (subcheck & valid & !lastvar) | (add & (!addoverflow | endOfCtr))
|
||||
* index + 1 : (subcheck & !valid) | (add & (addoverflow & !endOfCtr)) | (bitcnt & ctrWordIsZero & !endOfCtr)
|
||||
* index - 1 : (cmp & cmpnores & !begOfCtr)
|
||||
* 0 : (cmp & (!cmpnores | begOfCtr))
|
||||
* index[addoverflow + bitcntval] : (bitcnt & (!ctrWordIsZero | endOfCtr)
|
||||
*/
|
||||
u32 tempA = submode & (subvalid & islvar);
|
||||
// if (submode & (subvalid & islvar)) fprintf(iofile, "j: %u\n", jval);
|
||||
u32 tempB = (submode & (subvalid & !islvar));
|
||||
u32 tempC = (submode & !subvalid) | (addmode & (addoverflow & !endOfCtr)) | (bctmode & (ctrWordIsZero & !endOfCtr));
|
||||
u32 tempD = (cmpmode & (cmpnores & !begOfCtr));
|
||||
u32 tempE = (addmode & (!addoverflow | endOfCtr));
|
||||
u32 tempF = (bctmode & (!ctrWordIsZero | endOfCtr));
|
||||
tempA *= jword;
|
||||
tempB *= *index;
|
||||
tempC *= (*index + 1);
|
||||
tempD *= (*index - 1);
|
||||
tempE *= (wcnt - 1);
|
||||
u32 indexval = (*addval + bitcntval);
|
||||
u8 indexcmp = indexval < varcnt;
|
||||
indexval = indexval * indexcmp + (varcnt - 1) * (!indexcmp);
|
||||
tempF *= c->index[((indexval) * bctmode)];
|
||||
// if (bctmode & (!ctrWordIsZero | endOfCtr)) fprintf(iofile,"z: %u %u\n", (indexval), c->index[((indexval) * bctmode)]);
|
||||
*index = tempA | tempB | tempC | tempD | tempE | tempF;
|
||||
|
||||
tempA = submode & (subvalid & islvar);
|
||||
tempB = (submode & (subvalid & !islvar));
|
||||
tempC = (addmode & (addoverflow & !endOfCtr));
|
||||
tempD = (bctmode & (ctrWordIsZero & !endOfCtr));
|
||||
tempA *= (jbit);
|
||||
tempB *= (*addval + 1);
|
||||
tempC *= (addoverflow);
|
||||
tempD *= (*addval + 32);
|
||||
*addval = tempA | tempB | tempC | tempD;
|
||||
/*
|
||||
* 1 << jbit : (subcheck & valid & lastvar)
|
||||
* addval + 1 : (subcheck & valid & !lastvar)
|
||||
* 0 : (subcheck & !valid) | (add & (!addoverflow | endOfCtr)) | (cmp) | (bitcnt & (!ctrWordIsZero | endOfCtr)
|
||||
* addoverflow : (add & (addoverflow & !endOfCtr))
|
||||
* addoverflow + 32 : (bitcnt & ctrWordIsZero & !endOfCtr)
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SAT : (subcheck & !valid & lastclause)
|
||||
* UNSAT : (cmp & (!cmpnores | begOfCtr)) & !cmpisless
|
||||
*/
|
||||
u8 issat = (submode & ((!subvalid) & islclause));
|
||||
u8 isusat = (cmpmode & ((!cmpnores) | begOfCtr)) & (!cmpisless);
|
||||
u8 isdone = issat | isusat;
|
||||
|
||||
*mode += isdone << 2U;
|
||||
*index ^= (*index * isdone);
|
||||
*index += issat;
|
||||
/*
|
||||
if (isdone) {
|
||||
*mode = 9;
|
||||
*index = issat;
|
||||
// printf("%u\n", issat);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* ADD : (add & (!addoverflow | endOfCtr))
|
||||
* CMP : (cmp & (!cmpnores | begOfCtr))
|
||||
* BCT : (bitcnt & (!ctrWordIsZero | endOfCtr))
|
||||
* CHK : (subcheck & valid & lastvar)
|
||||
*/
|
||||
tempA = (addmode & (!addoverflow | endOfCtr));
|
||||
tempB = (cmpmode & (!cmpnores | begOfCtr));
|
||||
tempC = (bctmode & (!ctrWordIsZero | endOfCtr));
|
||||
tempD = (submode & (subvalid & islvar));
|
||||
*mode += tempA | tempB | tempC;
|
||||
*mode -= (tempD * 3);
|
||||
}
|
||||
|
||||
void ctrthings4(cnf* c, u32* state, u32* ctr, const u32* max, FILE* iofile) {
|
||||
u32 varcnt = c->cnts[0];
|
||||
u32 clausecnt = c->cnts[1];
|
||||
u32 wcnt = 1U + (varcnt >> 5U);
|
||||
u32* mode = state;
|
||||
u32* index = state + 1;
|
||||
u32* addval = state + 2;
|
||||
|
||||
// printf("> %u %u %u %u %u\n", *mode, *index, *addval, *ctr, *max);
|
||||
|
||||
|
||||
u8 addmode = (*mode == ADD);
|
||||
u8 cmpmode = (*mode == CMP);
|
||||
u8 bctmode = (*mode == BCT);
|
||||
u8 submode = (*mode == CHK);
|
||||
|
||||
// 1 mask for if in subcheck mode, 1 for if not
|
||||
u32 submask = -submode;
|
||||
u32 nsubmask = ~submask;
|
||||
u32 addmask = -addmode;
|
||||
u32 naddmask = ~addmask;
|
||||
|
||||
/* subcheck */
|
||||
// used to find correct ctr index
|
||||
u32 varcor = varcnt - 1;
|
||||
// Mask to prevent oob accesses for when (varcnt / 32) > clausecnt (or smth like that)
|
||||
u32 currclause = *index & submask;
|
||||
// clausedat records are 3 words long, multiply index by 3
|
||||
u32 currclauseshifted = currclause * 3U;
|
||||
// Retrieve beginning index of clause, add current var index
|
||||
u32 currvarind = c->clausedat[currclauseshifted] + (*addval & submask);
|
||||
// Retrieve current var & parity
|
||||
u32 currvar = c->variables[currvarind];
|
||||
u8 corrpar = c->parities[currvarind];
|
||||
// Calculate index in ctr
|
||||
u32 currvarcorr = (varcor - currvar);
|
||||
u32 currvarword = currvarcorr >> 5U;
|
||||
u32 currvarbit = currvarcorr & 0b11111U;
|
||||
|
||||
/* Can we make there be 1 ctr read per iteration? */
|
||||
// If in sub mode, retrieve current var's bit, else return ctr[*index]
|
||||
u32 ctrind = (currvarword & submask) | (*index & nsubmask);
|
||||
u32 ctrval = ctr[ctrind];
|
||||
u32 maxval = max[*index & nsubmask];
|
||||
|
||||
// Extract parity bit from ctr
|
||||
u32 currpar = (ctrval >> currvarbit) & 1U;
|
||||
// Check if assignment parity matches clause parity
|
||||
u8 subvalid = currpar == corrpar;
|
||||
// Don't mask addval because it's not being used for a lookup
|
||||
u8 islvar = c->clausedat[currclauseshifted + 1U] == (*addval + 1);
|
||||
u32 jval = c->clausedat[currclauseshifted + 2U];
|
||||
u32 jword = jval >> 5U;
|
||||
u32 jbit = 1U << (jval & 0b11111U);
|
||||
|
||||
u8 islclause = (clausecnt - 1) <= *index;
|
||||
|
||||
|
||||
u8 endOfCtr = (ctrind == (wcnt - 1));
|
||||
u8 begOfCtr = (ctrind == 0);
|
||||
|
||||
/* add */
|
||||
u32 nctr = ctrval + *addval;
|
||||
u8 addoverflow = (nctr < ctrval);
|
||||
|
||||
/* cmp */
|
||||
u8 cmpresult = -(ctrval < maxval);
|
||||
cmpresult += (ctrval > maxval);
|
||||
u8 cmpnores = cmpresult == 0;
|
||||
u8 cmpisless = cmpresult == 255U;
|
||||
|
||||
/* bitcnt */
|
||||
u32 bitcntval = (ctrval & -ctrval);
|
||||
u32 bitcnt = ((bitcntval & 0x0000FFFFU) != 0) << 4U;
|
||||
bitcnt |= ((bitcntval & 0x00FF00FF) != 0) << 3U;
|
||||
bitcnt |= ((bitcntval & 0x0F0F0F0F) != 0) << 2U;
|
||||
bitcnt |= ((bitcntval & 0x33333333) != 0) << 1U;
|
||||
bitcnt |= (bitcntval & 0x55555555) != 0;
|
||||
bitcnt += bitcntval != 0;
|
||||
bitcntval = 32 - bitcnt;
|
||||
u8 ctrWordIsZero = ctrval == 0;
|
||||
|
||||
// Set current ctr word
|
||||
// If in add mode, update ctr val, otherwise leave unchanged
|
||||
ctr[ctrind] = (nctr & addmask) | (ctr[ctrind] & naddmask);
|
||||
// Set current index
|
||||
|
||||
|
||||
/*
|
||||
* jword : (subcheck & valid & lastvar)
|
||||
* index : (subcheck & valid & !lastvar) | (add & (!addoverflow | endOfCtr))
|
||||
* index + 1 : (subcheck & !valid) | (add & (addoverflow & !endOfCtr)) | (bitcnt & ctrWordIsZero & !endOfCtr)
|
||||
* index - 1 : (cmp & cmpnores & !begOfCtr)
|
||||
* 0 : (cmp & (!cmpnores | begOfCtr))
|
||||
* index[addoverflow + bitcntval] : (bitcnt & (!ctrWordIsZero | endOfCtr)
|
||||
*/
|
||||
u32 tempA = -(submode & (subvalid & islvar));
|
||||
if (submode & (subvalid & islvar)) fprintf(iofile, "j: %u\n", jval);
|
||||
u32 tempB = -(submode & (subvalid & !islvar));
|
||||
u32 tempC = -((submode & !subvalid) | (addmode & (addoverflow & !endOfCtr)) | (bctmode & (ctrWordIsZero & !endOfCtr)));
|
||||
u32 tempD = -(cmpmode & (cmpnores & !begOfCtr));
|
||||
u32 tempE = -(addmode & (!addoverflow | endOfCtr));
|
||||
u32 tempF = -(bctmode & (!ctrWordIsZero | endOfCtr));
|
||||
tempA &= jword;
|
||||
tempB &= *index;
|
||||
tempC &= (*index + 1);
|
||||
tempD &= (*index - 1);
|
||||
tempE &= (wcnt - 1);
|
||||
u32 indexval = (*addval + bitcntval);
|
||||
u32 indexcmp = -(indexval < varcnt);
|
||||
indexval = (indexval & indexcmp) | ((varcnt - 1) & (~indexcmp));
|
||||
tempF &= c->index[((indexval) * bctmode)];
|
||||
if (bctmode & (!ctrWordIsZero | endOfCtr)) fprintf(iofile,"z: %u %u\n", (indexval), c->index[((indexval) * bctmode)]);
|
||||
*index = tempA | tempB | tempC | tempD | tempE | tempF;
|
||||
|
||||
tempA = -(submode & (subvalid & islvar));
|
||||
tempB = -(submode & (subvalid & !islvar));
|
||||
tempC = -(addmode & (addoverflow & !endOfCtr));
|
||||
tempD = -(bctmode & (ctrWordIsZero & !endOfCtr));
|
||||
tempA &= (jbit);
|
||||
tempB &= (*addval + 1);
|
||||
tempC &= (addoverflow);
|
||||
tempD &= (*addval + 32);
|
||||
*addval = tempA | tempB | tempC | tempD;
|
||||
/*
|
||||
* 1 << jbit : (subcheck & valid & lastvar)
|
||||
* addval + 1 : (subcheck & valid & !lastvar)
|
||||
* 0 : (subcheck & !valid) | (add & (!addoverflow | endOfCtr)) | (cmp) | (bitcnt & (!ctrWordIsZero | endOfCtr)
|
||||
* addoverflow : (add & (addoverflow & !endOfCtr))
|
||||
* addoverflow + 32 : (bitcnt & ctrWordIsZero & !endOfCtr)
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SAT : (subcheck & !valid & lastclause)
|
||||
* UNSAT : (cmp & (!cmpnores | begOfCtr)) & !cmpisless
|
||||
*/
|
||||
u8 issat = (submode & ((!subvalid) & islclause));
|
||||
u8 isusat = (cmpmode & ((!cmpnores) | begOfCtr)) & (!cmpisless);
|
||||
u32 isdone = -(issat | isusat);
|
||||
|
||||
*mode |= isdone;
|
||||
*index ^= (*index & isdone);
|
||||
*index += issat;
|
||||
/*
|
||||
if (isdone) {
|
||||
*mode = 9;
|
||||
*index = issat;
|
||||
// printf("%u\n", issat);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* ADD : (add & (!addoverflow | endOfCtr))
|
||||
* CMP : (cmp & (!cmpnores | begOfCtr))
|
||||
* BCT : (bitcnt & (!ctrWordIsZero | endOfCtr))
|
||||
* CHK : (subcheck & valid & lastvar)
|
||||
*/
|
||||
tempA = (addmode & (!addoverflow | endOfCtr));
|
||||
tempB = (cmpmode & (!cmpnores | begOfCtr));
|
||||
tempC = (bctmode & (!ctrWordIsZero | endOfCtr));
|
||||
tempD = (submode & (subvalid & islvar));
|
||||
*mode += tempA | tempB | tempC;
|
||||
*mode -= (tempD * 3);
|
||||
}
|
||||
|
||||
void ctrthings5(cnf* c, u32* state, u32* ctr, const u32* max) {
|
||||
u32 varcnt = c->cnts[0];
|
||||
u32 clausecnt = c->cnts[1];
|
||||
u32 wcnt = 1U + (varcnt >> 5U);
|
||||
u32* mode = state;
|
||||
u32* index = state + 1;
|
||||
u32* addval = state + 2;
|
||||
|
||||
// printf("> %u %u %u %u %u\n", *mode, *index, *addval, *ctr, *max);
|
||||
|
||||
|
||||
u8 addmode = (*mode == ADD);
|
||||
u8 cmpmode = (*mode == CMP);
|
||||
u8 bctmode = (*mode == BCT);
|
||||
u8 submode = (*mode == CHK);
|
||||
|
||||
// 1 mask for if in subcheck mode, 1 for if not
|
||||
u32 submask = -submode;
|
||||
u32 nsubmask = ~submask;
|
||||
u32 addmask = -addmode;
|
||||
u32 naddmask = ~addmask;
|
||||
|
||||
/* subcheck */
|
||||
// used to find correct ctr index
|
||||
u32 varcor = varcnt - 1;
|
||||
// Mask to prevent oob accesses for when (varcnt / 32) > clausecnt (or smth like that)
|
||||
u32 currclause = *index & submask;
|
||||
// clausedat records are 3 words long, multiply index by 3
|
||||
u32 currclauseshifted = currclause * 3U;
|
||||
// Retrieve beginning index of clause, add current var index
|
||||
u32 currvarind = c->clausedat[currclauseshifted] + (*addval & submask);
|
||||
// Retrieve current var & parity
|
||||
u32 currvar = c->variables[currvarind];
|
||||
u8 corrpar = c->parities[currvarind];
|
||||
// Calculate index in ctr
|
||||
u32 currvarcorr = (varcor - currvar);
|
||||
u32 currvarword = currvarcorr >> 5U;
|
||||
u32 currvarbit = currvarcorr & 0b11111U;
|
||||
|
||||
/* Can we make there be 1 ctr read per iteration? */
|
||||
// If in sub mode, retrieve current var's bit, else return ctr[*index]
|
||||
u32 ctrind = (currvarword & submask) | (*index & nsubmask);
|
||||
u32 ctrval = ctr[ctrind];
|
||||
u32 maxval = max[*index & nsubmask];
|
||||
|
||||
// Extract parity bit from ctr
|
||||
u32 currpar = (ctrval >> currvarbit) & 1U;
|
||||
// Check if assignment parity matches clause parity
|
||||
u8 subvalid = currpar == corrpar;
|
||||
// Don't mask addval because it's not being used for a lookup
|
||||
u8 islvar = c->clausedat[currclauseshifted + 1U] == (*addval + 1);
|
||||
u32 jval = c->clausedat[currclauseshifted + 2U];
|
||||
u32 jword = jval >> 5U;
|
||||
u32 jbit = 1U << (jval & 0b11111U);
|
||||
|
||||
u8 islclause = (clausecnt - 1) <= *index;
|
||||
|
||||
|
||||
u8 endOfCtr = (ctrind == (wcnt - 1));
|
||||
u8 begOfCtr = (ctrind == 0);
|
||||
|
||||
/* add */
|
||||
u32 nctr = ctrval + *addval;
|
||||
u8 addoverflow = (nctr < ctrval);
|
||||
|
||||
/* cmp */
|
||||
u8 cmpresult = -(ctrval < maxval);
|
||||
cmpresult += (ctrval > maxval);
|
||||
u8 cmpnores = cmpresult == 0;
|
||||
u8 cmpisless = cmpresult == 255U;
|
||||
|
||||
/* bitcnt */
|
||||
u32 bitcntval = (ctrval & -ctrval);
|
||||
u32 bitcnt = ((bitcntval & 0x0000FFFFU) != 0) << 4U;
|
||||
bitcnt |= ((bitcntval & 0x00FF00FF) != 0) << 3U;
|
||||
bitcnt |= ((bitcntval & 0x0F0F0F0F) != 0) << 2U;
|
||||
bitcnt |= ((bitcntval & 0x33333333) != 0) << 1U;
|
||||
bitcnt |= (bitcntval & 0x55555555) != 0;
|
||||
bitcnt += bitcntval != 0;
|
||||
bitcntval = 32 - bitcnt;
|
||||
u8 ctrWordIsZero = ctrval == 0;
|
||||
|
||||
// Set current ctr word
|
||||
// If in add mode, update ctr val, otherwise leave unchanged
|
||||
ctr[ctrind] = (nctr & addmask) | (ctr[ctrind] & naddmask);
|
||||
// Set current index
|
||||
|
||||
u32 tempA = -(submode & (subvalid & islvar));
|
||||
u32 tempB = -(submode & (subvalid & !islvar));
|
||||
u32 tempC = -((submode & !subvalid) | (addmode & (addoverflow & !endOfCtr)) | (bctmode & (ctrWordIsZero & !endOfCtr)));
|
||||
u32 tempD = -(cmpmode & (cmpnores & !begOfCtr));
|
||||
u32 tempE = -(addmode & (!addoverflow | endOfCtr));
|
||||
u32 tempF = -(bctmode & (!ctrWordIsZero | endOfCtr));
|
||||
tempA &= jword;
|
||||
tempB &= *index;
|
||||
tempC &= (*index + 1);
|
||||
tempD &= (*index - 1);
|
||||
tempE &= (wcnt - 1);
|
||||
u32 indexval = (*addval + bitcntval);
|
||||
u32 indexcmp = -(indexval < varcnt);
|
||||
indexval = (indexval & indexcmp) | ((varcnt - 1) & (~indexcmp));
|
||||
tempF &= c->index[((indexval) * bctmode)];
|
||||
*index = tempA | tempB | tempC | tempD | tempE | tempF;
|
||||
|
||||
tempA = -(submode & (subvalid & islvar));
|
||||
tempB = -(submode & (subvalid & !islvar));
|
||||
tempC = -(addmode & (addoverflow & !endOfCtr));
|
||||
tempD = -(bctmode & (ctrWordIsZero & !endOfCtr));
|
||||
tempA &= (jbit);
|
||||
tempB &= (*addval + 1);
|
||||
tempC &= (addoverflow);
|
||||
tempD &= (*addval + 32);
|
||||
*addval = tempA | tempB | tempC | tempD;
|
||||
|
||||
/* completion detection */
|
||||
u8 issat = (submode & ((!subvalid) & islclause));
|
||||
u8 isusat = (cmpmode & ((!cmpnores) | begOfCtr)) & (!cmpisless);
|
||||
u32 isdone = -(issat | isusat);
|
||||
|
||||
/* exit */
|
||||
*mode |= isdone;
|
||||
*index ^= (*index & isdone);
|
||||
*index += issat;
|
||||
|
||||
tempA = (addmode & (!addoverflow | endOfCtr));
|
||||
tempB = (cmpmode & (!cmpnores | begOfCtr));
|
||||
tempC = (bctmode & (!ctrWordIsZero | endOfCtr));
|
||||
tempD = (submode & (subvalid & islvar));
|
||||
*mode += tempA | tempB | tempC;
|
||||
*mode -= (tempD * 3);
|
||||
}
|
||||
/** NOTES:
|
||||
* First thing's first is indexing, let's get started with that.
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
void ctrthings2(cnf* c, u32* state, u32* ctr, const u32* max, FILE* iofile) {
|
||||
// Pre-actual work setup stuff
|
||||
u32 wcnt = 1U + (c->cnts[0] >> 5U); // number of words we need (bitcnt + 1 ceiling divided by 32)
|
||||
u32* mode = state; // Make pointers to array values for convenience so I don't need to refer to everything via array indices
|
||||
u32* index = state + 1; // tbh the names aren't much better
|
||||
u32* addval = state + 2;
|
||||
|
||||
u32 varcnt = c->cnts[0] - 1; // (varcnt - 1) - v gives us the bit index of variable v in the ctr
|
||||
u32 chkmsk = 0xFFFFFFFFU * (*mode == (2)); // Create a mask that zeros anything if we aren't in subsumption-check mode, to prevent out of bounds accesses
|
||||
u32 chkcls = *index & chkmsk; // Use said mask to zero the index if we're not in check mode
|
||||
u32 chkind = c->clausedat[3 * chkcls] + (*addval & chkmsk); // potentially zeroed index to lookup clause start, add potentially zeroed variable index in that clause
|
||||
u32 var = c->variables[chkind]; // Retrieve variable at that index
|
||||
u8 par = c->parities[chkind]; // Retrieve the variable's CNF parity
|
||||
u32 vword = (varcnt - var) >> 5U; // As mentioned in the varcnt line, compute word index of var in ctr
|
||||
u32 vbit = (varcnt - var) & 0b11111U; // Compute bit index of var in above ctr word
|
||||
u8 corpar = (ctr[vword] >> vbit) & 1U; // Extract correct parity
|
||||
u8 isvalid = (par == corpar); // Check if CNF parity equals assignment parity from ctr
|
||||
u8 islvar = ((*addval + 1) == c->clausedat[3 * chkcls + 1]); // Check if this is the last variable of the clause
|
||||
u8 isbchk0 = (*mode == (2)); // If in check mode
|
||||
u8 isbchk1 = isbchk0 & isvalid; // If in check mode & valid
|
||||
u8 isbchk2 = isbchk1 & islvar; // If in check mode, valid, and on the last var
|
||||
u32 j = c->clausedat[3 * chkcls + 2]; // retrieve j val for current clause
|
||||
*mode -= 2 * isbchk2; // If isbchk2, move to add mode
|
||||
#ifdef DEBUGSAT
|
||||
if (isbchk2) fprintf(iofile,"j: %u\n", j);
|
||||
#endif
|
||||
*index = (j >> 5U) * isbchk2 + *index * (!isbchk2); // if isbchk2, set index for the jval to be added
|
||||
*addval = (1U << (j & 0b11111U)) * isbchk2 + *addval * (!isbchk2); // if isbchk, set addval for the jval to be added;
|
||||
*addval += ((isbchk1) & (!islvar)); // If in check mode, valid, and not on the last var, move to the next var
|
||||
u8 isbchk3 = (isbchk0 & (!isvalid)); // if in check mode and not valid, move to the next clause
|
||||
*addval *= (!isbchk3); // if moving to next clause, go to 0th var of that clause
|
||||
*index += (isbchk3); // actually moving to the next clause
|
||||
u8 issat = (*index == c->cnts[1]) * (isbchk3); // if we just passed the last clause, none were valid, so our assignment is SAT
|
||||
|
||||
u32 cmpaddind = *index * (*mode != (2)); // Zero index into ctr if in check mode, to prevent out of bounds accesses
|
||||
u32 nval = ctr[cmpaddind] + *addval; // Find the result of the current step if it was addition
|
||||
*addval = (nval < ctr[cmpaddind]) * (*mode == (0)) + (*addval) * (*mode == (2)); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone.
|
||||
ctr[cmpaddind] = nval * ((*mode == (0)) & !issat) + ctr[cmpaddind] * ((*mode != (0)) | issat); // If in add mode, set new ctr val, otherwise leave unchanged
|
||||
*addval -= (ctr[cmpaddind] < max[cmpaddind]) * (*mode == (1)); // If in comparison mode, decrement addval if less than
|
||||
*addval += (ctr[cmpaddind] > max[cmpaddind]) * (*mode == (1)); // If in comparison mode, increment addval if greater than
|
||||
u8 addcond = (*addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word
|
||||
u8 cmpcond = (*addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word
|
||||
u8 exittime = (*mode == (1)) & cmpcond & (*addval != -1);
|
||||
exittime |= issat;
|
||||
if (exittime) { // If in cmpmode and the comparison result is not less than, unsat
|
||||
printf("%u\n", issat);
|
||||
*mode = 3;
|
||||
return;
|
||||
}
|
||||
u8 cmpdone = cmpcond & (*mode == (1)); // if comparison completion conditions are satisfied and in CMP mode
|
||||
u32 addindex = (cmpaddind + 1) * !addcond + (cmpaddind) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1
|
||||
*index = addindex * (*mode == (0)) + (*index - (*mode == (1))) * (*mode != (0)); // If in add mode, use addindex; if in cmp mode, decrement index by 1
|
||||
*index *= !cmpdone;
|
||||
*addval *= !(((addcond) & (*mode == (0))) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged.
|
||||
*mode += addcond * (*mode == (0)) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode.
|
||||
}
|
||||
|
||||
i32 cpusolve(cnf* c) {
|
||||
u32 state[3];
|
||||
u32 wcnt = 1 + (c->cnts[0] >> 5U);
|
||||
|
||||
u32* ctr = calloc((wcnt), sizeof(u32));
|
||||
if (ctr == NULL) {
|
||||
printf("Failed to allocate solution buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
u32* max = calloc((wcnt), sizeof(u32));
|
||||
if (max == NULL) {
|
||||
printf("Failed to allocate solution buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
stateaddpow(wcnt, max, c->cnts[0]);
|
||||
|
||||
state[0] = 2;
|
||||
state[1] = state[2] = 0;
|
||||
|
||||
while (state[0] < 4) {
|
||||
ctrthings5(c, state, ctr, max);
|
||||
}
|
||||
|
||||
free(ctr);
|
||||
free(max);
|
||||
return state[1];
|
||||
}
|
||||
|
||||
i32 compareFiles(FILE *file1, FILE *file2){
|
||||
char ch1 = getc(file1);
|
||||
char ch2 = getc(file2);
|
||||
int error = 0, pos = 0, line = 1;
|
||||
while (ch1 != EOF && ch2 != EOF){
|
||||
pos++;
|
||||
if (ch1 == '\n' && ch2 == '\n'){
|
||||
line++;
|
||||
pos = 0;
|
||||
}
|
||||
if (ch1 != ch2){
|
||||
error++;
|
||||
printf("Line Number : %d \tError"
|
||||
" Position : %d \n", line, pos);
|
||||
}
|
||||
ch1 = getc(file1);
|
||||
ch2 = getc(file2);
|
||||
}
|
||||
if (ch1 != EOF || ch2 != EOF) error += 1;
|
||||
printf("Total Errors : %d\t\n", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
void debugsolve(char* path) {
|
||||
|
||||
FILE* reffile = fopen("temp.txt", "w");
|
||||
FILE* cmpfile = fopen("temp2.txt", "w");
|
||||
|
||||
runcsfloc(path, reffile);
|
||||
|
||||
cnf* c = readDIMACS(path);
|
||||
sortlastnum(c);
|
||||
|
||||
|
||||
u32* state = malloc(sizeof(u32) * 3);
|
||||
u32 wcnt = 1 + (c->cnts[0] >> 5U);
|
||||
|
||||
u32* ctr = calloc((wcnt), sizeof(u32));
|
||||
if (ctr == NULL) {
|
||||
printf("Failed to allocate solution buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
u32* max = calloc((wcnt), sizeof(u32));
|
||||
if (max == NULL) {
|
||||
printf("Failed to allocate solution buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
stateaddpow(wcnt, max, c->cnts[0]);
|
||||
|
||||
state[0] = 2;
|
||||
state[1] = state[2] = 0;
|
||||
|
||||
while (state[0] < 4) {
|
||||
ctrthings4(c, state, ctr, max, cmpfile);
|
||||
}
|
||||
|
||||
free(ctr);
|
||||
free(max);
|
||||
|
||||
freecnf(c);
|
||||
|
||||
fclose(reffile);
|
||||
fclose(cmpfile);
|
||||
|
||||
reffile = fopen("temp.txt", "r");
|
||||
cmpfile = fopen("temp2.txt", "r");
|
||||
|
||||
i32 res = compareFiles(reffile, cmpfile);
|
||||
if (res != 0) {
|
||||
printf("%s\n", path);
|
||||
exit(1);
|
||||
}
|
||||
fclose(reffile);
|
||||
fclose(cmpfile);
|
||||
free(state);
|
||||
|
||||
}
|
||||
32
cpusolver.h
32
cpusolver.h
@@ -1,31 +1,9 @@
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
|
||||
typedef struct {
|
||||
u32 varcnt;
|
||||
u32 clausecnt;
|
||||
u32* clauseinds;
|
||||
u32* clausevals;
|
||||
|
||||
u32* watchlist;
|
||||
} cpusolver;
|
||||
|
||||
void cpusolve() {
|
||||
cpusolver s;
|
||||
#include "ncnf.h"
|
||||
|
||||
|
||||
}
|
||||
/*
|
||||
* Read in DIMACs
|
||||
* A clause is a list of variables and their assignments
|
||||
*
|
||||
* Create watchlists:
|
||||
* 2 entries for each list - flag bit for if its a binary clause
|
||||
*
|
||||
* 2-watch:
|
||||
* If one of them is marked false, remove and replace
|
||||
* if there are no other literals in the clause that are true, unitprop the last literal
|
||||
* if there are no literals that evaluate to true and the other watch is false, UNSAT
|
||||
* if a literal is true, do nothing
|
||||
* if 2 literals are unassigned, clause is unsat
|
||||
*/
|
||||
|
||||
i32 cpusolve(cnf* c);
|
||||
|
||||
void debugsolve(char* path);
|
||||
630
csflocref.c
Normal file
630
csflocref.c
Normal file
@@ -0,0 +1,630 @@
|
||||
#include "csflocref.h"
|
||||
/*
|
||||
============================================================================
|
||||
Name : csfloc.c
|
||||
Author : Gábor Kusper
|
||||
Version : 1.0
|
||||
Copyright : Gábor Kusper
|
||||
Description : CSFLOC in C, Ansi-style
|
||||
============================================================================
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
char* fileName;
|
||||
int doSorting = 1;
|
||||
int doIndexing = 1; // can be true only if doSorting is also true
|
||||
int addLongTailClauses = 0; // can be greater than 0 only if doIndexing is true
|
||||
int howManyClausesAddMaximumPerClauses = 0;
|
||||
|
||||
unsigned int MAXINTINDEX;
|
||||
unsigned int MAXBITINDEX;
|
||||
unsigned int MAXVALUE;
|
||||
|
||||
unsigned int* counter;
|
||||
unsigned int* clauses;
|
||||
|
||||
int* numxsAtEnd;
|
||||
int* indexClausesByNumxsAtEnd;
|
||||
|
||||
int numOfClauses;
|
||||
int numOfVariables;
|
||||
|
||||
int lentghOfBitArray;
|
||||
|
||||
|
||||
void printBitArray(unsigned int* toPrint)
|
||||
{
|
||||
int i;
|
||||
for(i = numOfVariables-1; i>=0; i--)
|
||||
{
|
||||
int intIndex = i / (sizeof(int)*8);
|
||||
int bitIndex = i % (sizeof(int)*8);
|
||||
int mask = 1 << bitIndex;
|
||||
if((toPrint[intIndex] & mask) > 0) printf("%d ", numOfVariables-i);
|
||||
else printf("-%d ", numOfVariables-i);
|
||||
}
|
||||
printf("0 \n");
|
||||
}
|
||||
|
||||
void printClause(int clauseIndex)
|
||||
{
|
||||
printf("clauseIndex:%d\n",clauseIndex);
|
||||
printf("numxsAtEnd[clauseIndex]:%d\n",numxsAtEnd[clauseIndex]);
|
||||
int index = clauseIndex * lentghOfBitArray * 2;
|
||||
int i;
|
||||
for(i = numOfVariables-1; i>=0; i--)
|
||||
{
|
||||
int intIndex = i / (sizeof(int)*8);
|
||||
int bitIndex = i % (sizeof(int)*8);
|
||||
int mask = 1 << bitIndex;
|
||||
if((clauses[index + intIndex*2] & mask) > 0) printf("%d ", numOfVariables-i);
|
||||
else printf("-%d ", numOfVariables-i);
|
||||
}
|
||||
printf("0 \n");
|
||||
for(i = numOfVariables-1; i>=0; i--)
|
||||
{
|
||||
int intIndex = i / (sizeof(int)*8);
|
||||
int bitIndex = i % (sizeof(int)*8);
|
||||
int mask = 1 << bitIndex;
|
||||
if((clauses[index + intIndex*2 + 1] & mask) > 0) printf("%d ", numOfVariables-i);
|
||||
else printf("-%d ", numOfVariables-i);
|
||||
}
|
||||
printf("0 \n");
|
||||
}
|
||||
|
||||
|
||||
void printClauses()
|
||||
{
|
||||
int clauseIndex;
|
||||
for(clauseIndex=0; clauseIndex < numOfClauses; clauseIndex++)
|
||||
{
|
||||
printClause(clauseIndex);
|
||||
}
|
||||
}
|
||||
|
||||
// coin: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightBinSearch
|
||||
int countZeroBitsOnTheRight(unsigned int v)
|
||||
{
|
||||
int c;
|
||||
// NOTE: if 0 == v, then c = 31.
|
||||
if (v & 0x1)
|
||||
{
|
||||
// special case for odd v (assumed to happen half of the time)
|
||||
c = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = 1;
|
||||
if ((v & 0xffff) == 0)
|
||||
{
|
||||
v >>= 16;
|
||||
c += 16;
|
||||
}
|
||||
if ((v & 0xff) == 0)
|
||||
{
|
||||
v >>= 8;
|
||||
c += 8;
|
||||
}
|
||||
if ((v & 0xf) == 0)
|
||||
{
|
||||
v >>= 4;
|
||||
c += 4;
|
||||
}
|
||||
if ((v & 0x3) == 0)
|
||||
{
|
||||
v >>= 2;
|
||||
c += 2;
|
||||
}
|
||||
c -= v & 0x1;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
int countZeroBitsOnTheRightInBitArray(int clauseIndex)
|
||||
{
|
||||
int countZeroBits = 0;
|
||||
int index = clauseIndex * lentghOfBitArray * 2;
|
||||
int intIndex;
|
||||
for(intIndex=0; intIndex<lentghOfBitArray; intIndex++)
|
||||
{
|
||||
if(clauses[index + intIndex*2]!=0) break;
|
||||
countZeroBits += sizeof(unsigned int)*8;
|
||||
}
|
||||
if (intIndex == lentghOfBitArray) return numOfVariables-1;
|
||||
countZeroBits += countZeroBitsOnTheRight(clauses[index + intIndex*2]);
|
||||
return countZeroBits;
|
||||
}
|
||||
|
||||
int countZeroBitsOnTheRightInBitArrayOldOne(unsigned int* bits)
|
||||
{
|
||||
int countZeroBits = 0;
|
||||
int intIndex ;
|
||||
for(intIndex=0; intIndex<lentghOfBitArray; intIndex++)
|
||||
{
|
||||
if(bits[intIndex]!=0) break;
|
||||
countZeroBits += sizeof(unsigned int)*8;
|
||||
}
|
||||
if (intIndex == lentghOfBitArray) return numOfVariables-1;
|
||||
countZeroBits += countZeroBitsOnTheRight(bits[intIndex]);
|
||||
return countZeroBits;
|
||||
}
|
||||
|
||||
|
||||
void resolution(int clauseIndexC, int clauseIndexA, int clauseIndexB, int intIndex, int bitIndex)
|
||||
{
|
||||
|
||||
//printf("Start of Resolution\n");
|
||||
//printf("intIndex: %d\n", intIndex);
|
||||
//printf("bitIndex: %d\n", bitIndex);
|
||||
//printClause(clauseIndexA);
|
||||
//printClause(clauseIndexB);
|
||||
|
||||
numxsAtEnd[clauseIndexC] = -1;
|
||||
int indexA = clauseIndexA * lentghOfBitArray * 2;
|
||||
int indexB = clauseIndexB * lentghOfBitArray * 2;
|
||||
if ((clauses[indexA + intIndex*2 +1] & 1 << bitIndex) == (clauses[indexB + intIndex*2 +1] & 1 << bitIndex)) return;
|
||||
int i;
|
||||
for(i=0; i<lentghOfBitArray; i++)
|
||||
{
|
||||
unsigned int tesztA, tesztB, tesztMask;
|
||||
tesztMask = clauses[indexA + i*2] & clauses[indexB + i*2];
|
||||
tesztA = clauses[indexA + i*2 + 1] & tesztMask;
|
||||
tesztB = clauses[indexB + i*2 + 1] & tesztMask;
|
||||
if (i==intIndex) { tesztA |= 1<<bitIndex; tesztB |= 1<<bitIndex; }
|
||||
if (tesztA != tesztB) return;
|
||||
}
|
||||
|
||||
int indexC = clauseIndexC * lentghOfBitArray * 2;
|
||||
for(i=0; i<lentghOfBitArray; i++)
|
||||
{
|
||||
clauses[indexC + i*2] = clauses[indexA + i*2] | clauses[indexB + i*2];
|
||||
clauses[indexC + i*2 + 1] = clauses[indexA + i*2 + 1] | clauses[indexB + i*2 + 1];
|
||||
}
|
||||
clauses[indexC + intIndex*2] &= ~(1<<bitIndex);
|
||||
clauses[indexC + intIndex*2 + 1] &= ~(1<<bitIndex);
|
||||
numxsAtEnd[clauseIndexC] = countZeroBitsOnTheRightInBitArray(clauseIndexC);
|
||||
|
||||
//printf("End of Resolution\n");
|
||||
//printClause(clauseIndexA);
|
||||
//printClause(clauseIndexB);
|
||||
//printClause(clauseIndexC);
|
||||
}
|
||||
|
||||
void readDIMACScsfloc(char* fileName) {
|
||||
char c[1000];
|
||||
FILE *fptr;
|
||||
if ((fptr=fopen(fileName,"r"))==NULL)
|
||||
{
|
||||
printf("Error! opening file");
|
||||
exit(1);
|
||||
}
|
||||
int clauseIndex = 0;
|
||||
while (fgets(c,1000, fptr)!=NULL) {
|
||||
if (c[0]=='c' || c[0]=='%' || c[0]=='0' || c[0]=='\n') continue;
|
||||
if (c[0]=='p'){
|
||||
char* token = strtok(c, " ");
|
||||
token = strtok(NULL, " ");
|
||||
token = strtok(NULL, " ");
|
||||
numOfVariables = atoi(token);
|
||||
MAXINTINDEX = numOfVariables/(sizeof(int)*8);
|
||||
MAXBITINDEX = numOfVariables%(sizeof(int)*8);
|
||||
MAXVALUE = 1 << MAXBITINDEX;
|
||||
lentghOfBitArray = numOfVariables/(sizeof(int)*8)+1;
|
||||
token = strtok(NULL, " ");
|
||||
numOfClauses = atoi(token);
|
||||
numxsAtEnd = malloc(sizeof(int) * (numOfClauses+addLongTailClauses));
|
||||
clauses = malloc(sizeof(int) * (numOfClauses+addLongTailClauses) * lentghOfBitArray * 2);
|
||||
indexClausesByNumxsAtEnd = malloc(sizeof(int) * numOfVariables);
|
||||
int i;
|
||||
for(i=0; i<numOfVariables; i++)
|
||||
{
|
||||
indexClausesByNumxsAtEnd[i] = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
int index = clauseIndex * lentghOfBitArray * 2;
|
||||
int i;
|
||||
for(i=0; i<lentghOfBitArray; i++)
|
||||
{
|
||||
clauses[index + i*2] = 0;
|
||||
clauses[index + i*2 +1] = 0;
|
||||
}
|
||||
|
||||
int maxLiteral = 0;
|
||||
char* token = strtok(c, " ");
|
||||
while (token) {
|
||||
int literal = atoi(token);
|
||||
token = strtok(NULL, " ");
|
||||
if (literal == 0) continue;
|
||||
|
||||
int literalIndex = numOfVariables - abs(literal);
|
||||
int intIndex = literalIndex / (sizeof(int)*8);
|
||||
int bitIndex = literalIndex % (sizeof(int)*8);
|
||||
|
||||
unsigned int setMask = 1 << bitIndex;
|
||||
|
||||
clauses[index + intIndex * 2] |= setMask;
|
||||
if (literal > 0) clauses[index + intIndex * 2 +1] |= setMask;
|
||||
if (abs(literal) > maxLiteral) maxLiteral = abs(literal);
|
||||
}
|
||||
numxsAtEnd[clauseIndex] = numOfVariables - maxLiteral;
|
||||
clauseIndex++;
|
||||
if (clauseIndex == numOfClauses) break;
|
||||
}
|
||||
fclose(fptr);
|
||||
|
||||
/*
|
||||
for (uint i = 0; i < numOfClauses; ++i) {
|
||||
printf("%u ", numxsAtEnd[i]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("numxsAtEnd\n");
|
||||
*/
|
||||
}
|
||||
|
||||
void sortClausesByNumxsAtEnd()
|
||||
{
|
||||
//printf("Before sorting:\n");
|
||||
//printClauses();
|
||||
int i;
|
||||
for(i=1; i<numOfClauses; i++)
|
||||
{
|
||||
int clauseIndex;
|
||||
for(clauseIndex=0; clauseIndex<numOfClauses-i; clauseIndex++)
|
||||
{
|
||||
if (numxsAtEnd[clauseIndex]<numxsAtEnd[clauseIndex+1])
|
||||
{
|
||||
unsigned int temp = numxsAtEnd[clauseIndex];
|
||||
numxsAtEnd[clauseIndex] = numxsAtEnd[clauseIndex+1];
|
||||
numxsAtEnd[clauseIndex+1] = temp;
|
||||
|
||||
int indexA = clauseIndex * lentghOfBitArray * 2;
|
||||
int indexB = (clauseIndex+1) * lentghOfBitArray * 2;
|
||||
int relIndex;
|
||||
for(relIndex=0; relIndex<lentghOfBitArray*2; relIndex++)
|
||||
{
|
||||
temp = clauses[indexA+relIndex];
|
||||
clauses[indexA+relIndex] = clauses[indexB+relIndex];
|
||||
clauses[indexB+relIndex] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//printf("After sorting:\n");
|
||||
//printClauses();
|
||||
}
|
||||
|
||||
void createIndexClausesByNumxsAtEnd()
|
||||
{
|
||||
int i;
|
||||
int lastValue = 0;
|
||||
for(i=numOfClauses-1; i>=0; i--)
|
||||
{
|
||||
if (numxsAtEnd[i] > lastValue)
|
||||
{
|
||||
while(lastValue != numxsAtEnd[i])
|
||||
{
|
||||
indexClausesByNumxsAtEnd[lastValue] = i+1;
|
||||
lastValue++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// it might be, that we over index if there is no clause with numxAtEnd == 0
|
||||
// this part corrects this if needed
|
||||
int correctioningIndex = 0;
|
||||
while (indexClausesByNumxsAtEnd[correctioningIndex] == numOfClauses)
|
||||
{
|
||||
correctioningIndex++;
|
||||
}
|
||||
if (correctioningIndex!=0)
|
||||
{
|
||||
int goodValue = indexClausesByNumxsAtEnd[correctioningIndex];
|
||||
do
|
||||
{
|
||||
correctioningIndex--;
|
||||
indexClausesByNumxsAtEnd[correctioningIndex] = goodValue;
|
||||
}
|
||||
while(correctioningIndex!=0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void addClausesByResolution()
|
||||
{
|
||||
int numberOfNewClauses = 0;
|
||||
int numOfZerosAtEnd = 0;
|
||||
while (numberOfNewClauses<addLongTailClauses) // While adding clauses
|
||||
{
|
||||
int clauseIndexA= numOfClauses-1;
|
||||
if(numOfZerosAtEnd!=0)
|
||||
{
|
||||
clauseIndexA = indexClausesByNumxsAtEnd[numOfZerosAtEnd-1]-1;
|
||||
}
|
||||
for( ; clauseIndexA>=indexClausesByNumxsAtEnd[numOfZerosAtEnd]; clauseIndexA--)
|
||||
{
|
||||
int clauseIndexB;
|
||||
int newClausesPerClause = 0;
|
||||
for(clauseIndexB=clauseIndexA-1; clauseIndexB>=indexClausesByNumxsAtEnd[numOfZerosAtEnd]; clauseIndexB--)
|
||||
{
|
||||
resolution(numOfClauses+numberOfNewClauses, clauseIndexA, clauseIndexB,
|
||||
numOfZerosAtEnd/(sizeof(unsigned int)*8),
|
||||
numOfZerosAtEnd%(sizeof(unsigned int)*8));
|
||||
if (numxsAtEnd[numOfClauses+numberOfNewClauses] == -1) continue;
|
||||
|
||||
//printf("numOfZerosAtEnd: %d\n", numOfZerosAtEnd);
|
||||
//printf("numberOfNewClauses: %d\n", numberOfNewClauses);
|
||||
//printf("numxsAtEnd[numOfClauses+numberOfNewClauses]: %d\n", numxsAtEnd[numOfClauses+numberOfNewClauses]);
|
||||
|
||||
numberOfNewClauses++;
|
||||
newClausesPerClause++;
|
||||
|
||||
if (newClausesPerClause==howManyClausesAddMaximumPerClauses) break;
|
||||
if (numberOfNewClauses==addLongTailClauses) break;
|
||||
}
|
||||
if (numberOfNewClauses==addLongTailClauses) break;
|
||||
|
||||
|
||||
// +1 m?lys?g eleje
|
||||
int numOfZerosAtEnd2 = numxsAtEnd[numOfClauses+numberOfNewClauses-1];
|
||||
int clauseIndexC = numOfClauses+numberOfNewClauses-1;
|
||||
int clauseIndexD;
|
||||
for(clauseIndexD=indexClausesByNumxsAtEnd[numOfZerosAtEnd2-1]-1;
|
||||
clauseIndexD>=indexClausesByNumxsAtEnd[numOfZerosAtEnd2]; clauseIndexD--)
|
||||
{
|
||||
|
||||
//printf("before resolution in +1 depth\n");
|
||||
//printf("numxsAtEnd[clauseIndexC]: %d\n", numxsAtEnd[clauseIndexC]);
|
||||
//printf("numxsAtEnd[clauseIndexD]: %d\n", numxsAtEnd[clauseIndexD]);
|
||||
|
||||
|
||||
resolution(numOfClauses+numberOfNewClauses, clauseIndexC, clauseIndexD,
|
||||
numOfZerosAtEnd2/(sizeof(unsigned int)*8),
|
||||
numOfZerosAtEnd2%(sizeof(unsigned int)*8));
|
||||
if (numxsAtEnd[numOfClauses+numberOfNewClauses] == -1) continue;
|
||||
|
||||
//printf("after resolution in +1 depth\n");
|
||||
//printf("numOfZerosAtEnd2: %d\n", numOfZerosAtEnd2);
|
||||
//printf("numberOfNewClauses: %d\n", numberOfNewClauses);
|
||||
//printf("numxsAtEnd[numOfClauses+numberOfNewClauses]: %d\n", numxsAtEnd[numOfClauses+numberOfNewClauses]);
|
||||
|
||||
numberOfNewClauses++;
|
||||
newClausesPerClause++;
|
||||
|
||||
if (newClausesPerClause==howManyClausesAddMaximumPerClauses) break;
|
||||
if (numberOfNewClauses==addLongTailClauses) break;
|
||||
}
|
||||
if (newClausesPerClause==howManyClausesAddMaximumPerClauses) break;
|
||||
if (numberOfNewClauses==addLongTailClauses) break;
|
||||
if (numberOfNewClauses==addLongTailClauses) break;
|
||||
//+1 m?lys?g v?ge
|
||||
|
||||
|
||||
|
||||
}
|
||||
numOfZerosAtEnd++;
|
||||
if (numOfZerosAtEnd == numOfVariables) break;
|
||||
}
|
||||
|
||||
int i;
|
||||
for(i=numOfClauses; i<numOfClauses+numberOfNewClauses; i++)
|
||||
{
|
||||
int clauseIndex;
|
||||
for(clauseIndex=i-1; clauseIndex>=0; clauseIndex--)
|
||||
{
|
||||
if (numxsAtEnd[clauseIndex]<numxsAtEnd[clauseIndex+1])
|
||||
{
|
||||
unsigned int temp = numxsAtEnd[clauseIndex];
|
||||
numxsAtEnd[clauseIndex] = numxsAtEnd[clauseIndex+1];
|
||||
numxsAtEnd[clauseIndex+1] = temp;
|
||||
|
||||
int indexA = clauseIndex * lentghOfBitArray * 2;
|
||||
int indexB = (clauseIndex+1) * lentghOfBitArray * 2;
|
||||
int relIndex;
|
||||
for(relIndex=0; relIndex<lentghOfBitArray*2; relIndex++)
|
||||
{
|
||||
temp = clauses[indexA+relIndex];
|
||||
clauses[indexA+relIndex] = clauses[indexB+relIndex];
|
||||
clauses[indexB+relIndex] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
numOfClauses+=numberOfNewClauses;
|
||||
createIndexClausesByNumxsAtEnd();
|
||||
}
|
||||
|
||||
|
||||
void CCCSolver(FILE* iofile)
|
||||
{
|
||||
counter = malloc(sizeof(int)*lentghOfBitArray);
|
||||
int counterArrayIndex;
|
||||
for(counterArrayIndex=0; counterArrayIndex<lentghOfBitArray; counterArrayIndex++)
|
||||
{
|
||||
counter[counterArrayIndex] = 0;
|
||||
}
|
||||
|
||||
int changed = 1;
|
||||
while(changed)
|
||||
{
|
||||
changed = 0;
|
||||
int maxNumx = -1;
|
||||
int clauseIndex = 0;
|
||||
if (doIndexing)
|
||||
{
|
||||
int numOfZerosAtEnd = countZeroBitsOnTheRightInBitArrayOldOne(counter);
|
||||
|
||||
clauseIndex=indexClausesByNumxsAtEnd[numOfZerosAtEnd];
|
||||
fprintf(iofile,"z: %d %d\n", numOfZerosAtEnd, clauseIndex);
|
||||
}
|
||||
while(clauseIndex<numOfClauses)
|
||||
{
|
||||
int subsumed = 1;
|
||||
int intIndex = 0;
|
||||
int index = clauseIndex * lentghOfBitArray * 2;
|
||||
for(intIndex=0; intIndex<lentghOfBitArray; intIndex++)
|
||||
{
|
||||
if ((counter[intIndex] & clauses[index]) != (clauses[index+1] & clauses[index]))
|
||||
{
|
||||
subsumed = 0;
|
||||
break;
|
||||
}
|
||||
index+=2;
|
||||
}
|
||||
//printf("Subsumed: %d\n", subsumed);
|
||||
if (subsumed)
|
||||
{
|
||||
if (numxsAtEnd[clauseIndex]>maxNumx) maxNumx = numxsAtEnd[clauseIndex];
|
||||
// printf("s %u %u\n", clauseIndex, numxsAtEnd[clauseIndex]);
|
||||
if (doSorting) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
clauseIndex++;
|
||||
}
|
||||
if(maxNumx >= 0)
|
||||
{
|
||||
fprintf(iofile, "j: %d\n",maxNumx);
|
||||
int intIndex = maxNumx / (sizeof(int)*8);
|
||||
int bitIndex = maxNumx % (sizeof(int)*8);
|
||||
|
||||
unsigned int oldCounter = counter[intIndex];
|
||||
//printf("Counter old = %d : %d\n", intIndex, oldCounter);
|
||||
|
||||
counter[intIndex] += 1 << bitIndex;
|
||||
//printf("Counter new = %d : %u\n\n", intIndex, counter.array[intIndex]);
|
||||
|
||||
// add carry
|
||||
while(oldCounter > counter[intIndex]) {
|
||||
//printf("add carry\n");
|
||||
//printf("num of clauses:%d\n",numOfClauses);
|
||||
//printf("i = %d\n", i);
|
||||
//printf("Counter old = %d : %u\n", intIndex, oldCounter);
|
||||
//printf("bitIndex = %d ; 1 << bitIndex = %d\n", bitIndex, 1 << bitIndex);
|
||||
//printf("Counter new = %d : %u\n", intIndex, counter.array[intIndex]);
|
||||
intIndex++;
|
||||
oldCounter = counter[intIndex];
|
||||
//printf("Counter old = %d : %u\n", intIndex, oldCounter);
|
||||
counter[intIndex]++; // carry added here
|
||||
//printf("Counter new = %d : %u\n\n", intIndex, counter[intIndex]);
|
||||
}
|
||||
|
||||
if (counter[lentghOfBitArray-1] >= MAXVALUE) return;
|
||||
changed = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i=0; i<lentghOfBitArray; i++) {
|
||||
counter[i] = ~counter[i];
|
||||
}
|
||||
counter[lentghOfBitArray-1] &= (MAXVALUE-1);
|
||||
}
|
||||
|
||||
void printHelp() {
|
||||
printf("Count Clear Clause SAT Solver v2.0\n");
|
||||
printf("\tA SAT solver based on the CCC algorithm.\n");
|
||||
printf("\tDeveloped by Gabor Kusper, 2015.\n");
|
||||
printf("USAGE:\n");
|
||||
printf("\tcsfloc [-help] -input=<inputfile.cnf>\n");
|
||||
printf("\t\t<inputfile.cnf> is a file name, it is the SAT problem to be solved, "
|
||||
"it should be in DIMACS format.\n");
|
||||
printf("\t\tOUTPUT is written to standard output.\n");
|
||||
printf("OPTIONS:\n");
|
||||
printf("\t-help\tDisplays this help message.\n");
|
||||
printf("\t-dosort\tTurns on clause sorting.\n");
|
||||
printf("\t-donotsort\tTurns off clause sorting.\n");
|
||||
printf("\t-doindexing\tTurns on clause indexing, and also forces clause sorting to be switched on.\n");
|
||||
printf("\t-donotindexing\tTurns off clause indexing.\n");
|
||||
printf("\t-addLongTailClauses=N\tAllows to add long tail clauses, ");
|
||||
printf("max N clauses are allowed, if N is 0 then it switches off long tail clause adding. ");
|
||||
printf("If N is greater than 0 then it forces clause ordering and indexing.\n");
|
||||
printf("\t-addLongTailClausesPerClause=M\tAllows to add maximum M long tail clauses per each input clause, ");
|
||||
printf("if M is 0 then it switches off long tail clause adding, i.e., it sets N to be 0. ");
|
||||
printf("It has any effect if and only if long tail clause adding is switched on, ");
|
||||
printf("i.e., N and M are not 0.\n");
|
||||
printf("THE DEFAULT SETTINGS ARE:\n");
|
||||
printf("Clause indexing is turned on, clause sorting is turned on, ");
|
||||
printf("1000 long tail clauses are allowed, 100 per input clauses, i.e., N = 1000, and M = 100.\n");
|
||||
printf("EXAMPLE 1:\n");
|
||||
printf("\tcsfloc -input=hole6.cnf\n");
|
||||
printf("\tReads and solves the SAT problem represented by hole6.cnf. The solution is written to the standard output.\n");
|
||||
printf("EXAMPLE 2:\n");
|
||||
printf("\tcsfloc -input=uf50-01.cnf -dosort -doindex -addLongTailClauses=1000 -addLongTailClausesPerClause=100\n");
|
||||
printf("\tReads and solves the SAT problem represented by uf50-01.cnf. ");
|
||||
printf("Clause sorting is switched on. ");
|
||||
printf("Clause indexing is switched on. ");
|
||||
printf("Adding long tail clauses is switch on. Maximum 1000 new clauses are created, maximum 100 per input clauses. ");
|
||||
printf("The solution is written to the standard output.\n");
|
||||
}
|
||||
|
||||
// read the "value" of a command line argument, where the format of the arguments is <name>=<value>
|
||||
char* getCommandLineArgValue(char* arg) {
|
||||
char* s = strchr(arg, '=');
|
||||
|
||||
if (!s) return NULL;
|
||||
|
||||
// close the "name" of the argument, as a string
|
||||
*s = '\0';
|
||||
|
||||
// return the "value" of the argument
|
||||
return s + 1;
|
||||
}
|
||||
|
||||
// process all the command line argument
|
||||
int processCommandLineArgs(int argc, char** argv) {
|
||||
int i=0;
|
||||
int numberOfSetMandatoryOptions=1;
|
||||
return numberOfSetMandatoryOptions == 1;
|
||||
}
|
||||
|
||||
void runcsfloc(char* path, FILE* iofile) {
|
||||
|
||||
long startTime = clock();
|
||||
//printf("phase 0\n");
|
||||
readDIMACScsfloc(path);
|
||||
//printf("phase 1\n");
|
||||
// printClauses();
|
||||
if (doSorting) {
|
||||
sortClausesByNumxsAtEnd();
|
||||
}
|
||||
//printf("phase 2\n");
|
||||
// printClauses();
|
||||
if (doIndexing) createIndexClausesByNumxsAtEnd();
|
||||
//printf("phase 3\n");
|
||||
if (addLongTailClauses>0) {
|
||||
addClausesByResolution();
|
||||
printf("Please don't do this\n");
|
||||
}
|
||||
|
||||
|
||||
CCCSolver(iofile);
|
||||
|
||||
long endTime = clock();
|
||||
|
||||
/*
|
||||
if (counter[lentghOfBitArray-1] >= MAXVALUE) printf("0\n");
|
||||
else
|
||||
{
|
||||
printf("1\n");
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
free(numxsAtEnd);
|
||||
free(counter);
|
||||
free(clauses);
|
||||
free(indexClausesByNumxsAtEnd);
|
||||
|
||||
|
||||
}
|
||||
5
csflocref.h
Normal file
5
csflocref.h
Normal file
@@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
#include <stdio.h>
|
||||
|
||||
void runcsfloc(char* path, FILE* iofile);
|
||||
1
githubtoken
Normal file
1
githubtoken
Normal file
@@ -0,0 +1 @@
|
||||
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
|
||||
481
gpusolver.c
481
gpusolver.c
@@ -1,14 +1,349 @@
|
||||
#include "gpusolver.h"
|
||||
#include <CL/cl.h>
|
||||
#include "time.h"
|
||||
#include "gmp.h"
|
||||
|
||||
#define GLOBAL_SIZE (256)
|
||||
#define LOCAL_SIZE (GLOBAL_SIZE)
|
||||
|
||||
#define LOCAL_SIZE (64)
|
||||
#define GLOBAL_SIZE (2048)
|
||||
|
||||
#define CHECKASGN (true)
|
||||
|
||||
#define DEBUG
|
||||
|
||||
|
||||
|
||||
|
||||
gpusolver* initSolver() {
|
||||
gpusolver* o = calloc(1, sizeof(gpusolver));
|
||||
if (o == NULL) return NULL;
|
||||
|
||||
o->platformid = NULL;
|
||||
o->numplatforms = 0;
|
||||
|
||||
o->deviceid = NULL;
|
||||
o->numdevices = 0;
|
||||
|
||||
FILE* fp = fopen("../psat.cl", "r");
|
||||
if (!fp) {
|
||||
fprintf(stderr, "Failed to load kernel\n");
|
||||
// TODO: Cleanup
|
||||
return NULL;
|
||||
}
|
||||
o->source_str = malloc(0x100000);
|
||||
o->source_size = fread(o->source_str, 1, 0x100000, fp);
|
||||
o->source_str = realloc(o->source_str, o->source_size + 1);
|
||||
if (o->source_str == NULL) {
|
||||
printf("Failed to reallocate source\n");
|
||||
return NULL;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
cl_int res = clGetPlatformIDs(1, &(o->platformid), &(o->numplatforms));
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve OpenCL platform IDs\n");
|
||||
// TODO: Cleanup
|
||||
return NULL;
|
||||
}
|
||||
|
||||
res = clGetDeviceIDs(o->platformid, CL_DEVICE_TYPE_GPU, 1, &(o->deviceid), &(o->numdevices));
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve OpenCL device IDs\n");
|
||||
// TODO: Cleanup
|
||||
return NULL;
|
||||
}
|
||||
|
||||
o->ctx = clCreateContext(NULL, 1, &(o->deviceid), NULL, NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL context\n");
|
||||
// TODO: Cleanup
|
||||
return NULL;
|
||||
}
|
||||
cl_queue_properties properties[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0 };
|
||||
o->commqueue = clCreateCommandQueueWithProperties(o->ctx, o->deviceid, properties, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL command queue\n");
|
||||
// TODO: Cleanup
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &(o->gpuMemoryMax), NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to query total GPU memory\n");
|
||||
// TODO: CLeanup
|
||||
return NULL;
|
||||
}
|
||||
|
||||
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &(o->gpuLocalMax), NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to query total GPU memory\n");
|
||||
// TODO: CLeanup
|
||||
return NULL;
|
||||
}
|
||||
|
||||
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &(o->gpuAllocMax), NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to query total GPU memory\n");
|
||||
// TODO: CLeanup
|
||||
return NULL;
|
||||
}
|
||||
|
||||
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_ulong), &(o->gpuCUs), NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to query total GPU memory\n");
|
||||
// TODO: Cleanup
|
||||
return NULL;
|
||||
}
|
||||
|
||||
o->program = clCreateProgramWithSource(o->ctx, 1, (const char**) &(o->source_str), (const size_t*) &(o->source_size), &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL program\n");
|
||||
// TODO: Cleanup
|
||||
exit(1);
|
||||
}
|
||||
|
||||
res = clBuildProgram(o->program, 1, &(o->deviceid), NULL, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Build failed\n");
|
||||
// TODO: Cleanup
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
size_t loglen = 0;
|
||||
res = clGetProgramBuildInfo(o->program, o->deviceid, CL_PROGRAM_BUILD_LOG, NULL, NULL, &loglen);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve build logs\n");
|
||||
exit(1);
|
||||
}
|
||||
char* logbuf = malloc(sizeof(char) * loglen);
|
||||
res = clGetProgramBuildInfo(o->program, o->deviceid, CL_PROGRAM_BUILD_LOG, sizeof(char) * loglen, logbuf, &loglen);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve build logs\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("%*.s\n", (int) loglen, logbuf);
|
||||
free(logbuf);
|
||||
|
||||
o->kernel = clCreateKernel(o->program, "vectorSAT", &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create kernel\n");
|
||||
printf("%d\n", res);
|
||||
// TODO: Cleanup
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("Initialized solver:\n");
|
||||
printf("\tCompute Units: %lu\n", o->gpuCUs);
|
||||
printf("\tMax Global Memory: %lu\n", o->gpuMemoryMax);
|
||||
printf("\tMax Local Memory: %lu\n", o->gpuLocalMax);
|
||||
printf("\tMax Alloc Memory: %lu\n", o->gpuAllocMax);
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
i32 gpusolve2(gpusolver* gs, cnf* c) {
|
||||
u32 wcnt = 1 + (c->cnts[0] >> 5U);
|
||||
|
||||
u32* solution = calloc((wcnt + 1), sizeof(u32));
|
||||
if (solution == NULL) {
|
||||
printf("Failed to allocate solution buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
gs->gpuCUs = 1024;
|
||||
mpz_t gmpmax;
|
||||
mpz_init(gmpmax);
|
||||
mpz_ui_pow_ui(gmpmax, 2, c->cnts[0]);
|
||||
mpz_div_ui(gmpmax, gmpmax, gs->gpuCUs);
|
||||
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
|
||||
// mpz_out_str(stdout, 10, gmpmax);
|
||||
// printf("\n\n");
|
||||
mpz_clear(gmpmax);
|
||||
|
||||
solution[0] = 0;
|
||||
|
||||
cl_int res = 2;
|
||||
|
||||
cl_mem gpuheader = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, 2 * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF header buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpulvars = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, 3 * c->cnts[1] * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF lvar buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuvariables = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF variable buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuparities = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uchar), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF parity buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cl_mem gpuoutput = clCreateBuffer(gs->ctx, CL_MEM_READ_WRITE, (wcnt + 1) * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create output buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cl_mem gpuscratchpad = clCreateBuffer(gs->ctx, CL_MEM_READ_WRITE, 2 * wcnt * gs->gpuCUs * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create output buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Load buffers to GPU
|
||||
res = clEnqueueWriteBuffer(gs->commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF header write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(gs->commqueue, gpulvars, CL_TRUE, 0, 3 * c->cnts[1] * sizeof(cl_uint), c->clausedat, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF lvar write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(gs->commqueue, gpuvariables, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uint), c->variables, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF variable write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(gs->commqueue, gpuparities, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uchar), c->parities, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF parity write\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
res = clEnqueueWriteBuffer(gs->commqueue, gpuoutput, CL_TRUE, 0, (wcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF parity write\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
res = clSetKernelArg(gs->kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
|
||||
res = clSetKernelArg(gs->kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
|
||||
res = clSetKernelArg(gs->kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
|
||||
res = clSetKernelArg(gs->kernel, 3, sizeof(cl_mem), (void*) &gpuparities);
|
||||
|
||||
res = clSetKernelArg(gs->kernel, 4, sizeof(cl_mem), (void*) &gpuoutput);
|
||||
|
||||
res = clSetKernelArg(gs->kernel, 5, sizeof(cl_mem), (void*) &gpuscratchpad);
|
||||
|
||||
size_t deploySize[2] = { gs->gpuCUs, 64 };
|
||||
res = clEnqueueNDRangeKernel(gs->commqueue, gs->kernel, 1, NULL, &(gs->gpuCUs), &(gs->gpuCUs), 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue kernel for execution\n");
|
||||
exit(res);
|
||||
}
|
||||
|
||||
res = clEnqueueReadBuffer(gs->commqueue, gpuoutput, CL_TRUE, 0, (wcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to read kernel output\n");
|
||||
exit(1);
|
||||
}
|
||||
// u64 endtime = utime();
|
||||
|
||||
if (solution[0] == 0) {
|
||||
printf("UNSAT\n");
|
||||
} else if (solution[0] == 1) {
|
||||
printf("SAT: ");
|
||||
for (u32 k = 0; k < c->cnts[0]; ++k) {
|
||||
u32 vind = (c->cnts[0] - 1) - k;
|
||||
u32 iind = vind >> 5U;
|
||||
u32 bind = vind & 0b11111U;
|
||||
u8 par = (solution[iind + 1] >> bind) & 1U;
|
||||
printf("%u", par);
|
||||
}
|
||||
if (CHECKASGN) {
|
||||
u8 checkres = 0;
|
||||
for (u32 i = 0; i < c->cnts[1]; ++i) {
|
||||
checkres = 0;
|
||||
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) {
|
||||
u32 v = c->variables[c->clausedat[3 * i] + j];
|
||||
u32 vv = c->cnts[0] - 1;
|
||||
u32 g = (vv - v) >> 5U;
|
||||
u32 h = (vv - v) & 0b11111U;
|
||||
u8 paract = (solution[g + 1] >> h) & 1U;
|
||||
if (c->parities[c->clausedat[3 * i] + j] == paract) {
|
||||
checkres = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!checkres) break;
|
||||
}
|
||||
if (checkres) {
|
||||
printf(" \xE2\x9C\x93\n");
|
||||
} else {
|
||||
printf(" -\n");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
printf("What the fuck???\n");
|
||||
solution[0] = 3;
|
||||
}
|
||||
|
||||
res = clReleaseMemObject(gpuheader);
|
||||
res = clReleaseMemObject(gpulvars);
|
||||
res = clReleaseMemObject(gpuvariables);
|
||||
res = clReleaseMemObject(gpuparities);
|
||||
res = clReleaseMemObject(gpuoutput);
|
||||
res = clReleaseMemObject(gpuscratchpad);
|
||||
|
||||
i32 retval = (i32) solution[0];
|
||||
free(solution);
|
||||
return retval;
|
||||
}
|
||||
|
||||
void freeSolver(gpusolver* gs) {
|
||||
i32 res = 0;
|
||||
res = clFlush(gs->commqueue);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to release solver\n");
|
||||
return;
|
||||
}
|
||||
res = clFinish(gs->commqueue);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to release solver\n");
|
||||
return;
|
||||
}
|
||||
res = clReleaseKernel(gs->kernel);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to release solver\n");
|
||||
return;
|
||||
}
|
||||
res = clReleaseProgram(gs->program);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to release solver\n");
|
||||
return;
|
||||
}
|
||||
|
||||
res = clReleaseCommandQueue(gs->commqueue);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to release solver\n");
|
||||
return;
|
||||
}
|
||||
res = clReleaseContext(gs->ctx);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to release solver\n");
|
||||
return;
|
||||
}
|
||||
|
||||
res = clReleaseDevice(gs->deviceid);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to release solver\n");
|
||||
return;
|
||||
}
|
||||
free(gs->source_str);
|
||||
free(gs);
|
||||
}
|
||||
|
||||
i32 gpusolve(cnf* c) {
|
||||
cl_platform_id platformid = NULL;
|
||||
cl_device_id deviceid = NULL;
|
||||
@@ -28,7 +363,7 @@ i32 gpusolve(cnf* c) {
|
||||
source_size = fread( source_str, 1, 0x100000, fp);
|
||||
fclose( fp );
|
||||
|
||||
u32 wordcnt = 1 + ((c->varcnt) >> 5U);
|
||||
u32 wordcnt = 1 + ((c->cnts[0]) >> 5U);
|
||||
|
||||
u32* solution = calloc((wordcnt + 1), sizeof(u32));
|
||||
if (solution == NULL) {
|
||||
@@ -36,6 +371,14 @@ i32 gpusolve(cnf* c) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
mpz_t gmpmax;
|
||||
mpz_init(gmpmax);
|
||||
mpz_ui_pow_ui(gmpmax, 2, c->cnts[0]);
|
||||
mpz_div_ui(gmpmax, gmpmax, GLOBAL_SIZE);
|
||||
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
|
||||
mpz_clear(gmpmax);
|
||||
|
||||
// printf("%lu\n", wordcnt);
|
||||
cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to retrieve OpenCL platform IDs\n");
|
||||
@@ -49,6 +392,36 @@ i32 gpusolve(cnf* c) {
|
||||
}
|
||||
// printf("Found %u devices\n", numdevices);
|
||||
|
||||
u64 memoryMax = 0;
|
||||
res = clGetDeviceInfo(deviceid, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &memoryMax, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to query GPU memory\n");
|
||||
exit(1);
|
||||
}
|
||||
u64 localMax = 0;
|
||||
res = clGetDeviceInfo(deviceid, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMax, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to query GPU memory\n");
|
||||
exit(1);
|
||||
}
|
||||
u64 allocMax = 0;
|
||||
res = clGetDeviceInfo(deviceid, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &allocMax, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to query GPU memory\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("GPU mem: %lu %lu %lu\n", memoryMax, localMax, allocMax);
|
||||
|
||||
size_t computeUnits = 0;
|
||||
res = clGetDeviceInfo(deviceid, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &computeUnits, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to query GPU memory\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("Compute Units: %lu\n", computeUnits);
|
||||
|
||||
|
||||
cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create OpenCL context\n");
|
||||
@@ -74,38 +447,27 @@ i32 gpusolve(cnf* c) {
|
||||
*/
|
||||
|
||||
// TODO: Look into DMA, maybe? Could do clause learning CPU-side and just update the GPU buffer
|
||||
cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * sizeof(cl_uint), NULL, &res);
|
||||
cl_mem gpuheader = clCreateBuffer(context, CL_MEM_READ_ONLY, 2 * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF header buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, c->clausecnt * sizeof(cl_uint), NULL, &res);
|
||||
cl_mem gpulvars = clCreateBuffer(context, CL_MEM_READ_ONLY, 3 * c->cnts[1] * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF lvar buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
|
||||
cl_mem gpuvariables = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF variable buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuclauses = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF clause buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->litcnt * sizeof(cl_uchar), NULL, &res);
|
||||
cl_mem gpuparities = clCreateBuffer(context, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uchar), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF parity buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Allocate scratchpad memory
|
||||
cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, c->clausecnt * sizeof(cl_uchar), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create CNF subsumption scratchpad buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
/*
|
||||
cl_mem gpumaxvals = clCreateBuffer(context, CL_MEM_READ_WRITE, GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
@@ -120,30 +482,35 @@ i32 gpusolve(cnf* c) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
u32 cnfheader[3] = { c->litcnt, c->varcnt, c->clausecnt };
|
||||
cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * wordcnt * GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to create output buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Load buffers to GPU
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 3 * sizeof(cl_uint), cnfheader, 0, NULL, NULL);
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF header write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, c->clausecnt * sizeof(cl_uint), c->lastvars, 0, NULL, NULL);
|
||||
res = clEnqueueWriteBuffer(commqueue, gpulvars, CL_TRUE, 0, 3 * c->cnts[1] * sizeof(cl_uint), c->clausedat, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF lvar write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->variables, 0, NULL, NULL);
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuvariables, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uint), c->variables, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF variable write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuclauses, CL_TRUE, 0, c->litcnt * sizeof(cl_uint), c->clauses, 0, NULL, NULL);
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uchar), c->parities, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF clause write\n");
|
||||
printf("Failed to queue CNF parity write\n");
|
||||
exit(1);
|
||||
}
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuparities, CL_TRUE, 0, c->litcnt * sizeof(cl_uchar), c->pars, 0, NULL, NULL);
|
||||
|
||||
res = clEnqueueWriteBuffer(commqueue, gpuoutput, CL_TRUE, 0, (wordcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
|
||||
if (res != CL_SUCCESS) {
|
||||
printf("Failed to queue CNF parity write\n");
|
||||
exit(1);
|
||||
@@ -176,16 +543,19 @@ i32 gpusolve(cnf* c) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
size_t maxworkgrpu = 0;
|
||||
res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL);
|
||||
|
||||
printf("Max work group size: %lu\n", maxworkgrpu);
|
||||
|
||||
res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
|
||||
res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
|
||||
res = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
|
||||
res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuclauses);
|
||||
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuparities);
|
||||
res = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*) &gpuparities);
|
||||
|
||||
res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuoutput);
|
||||
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuoutput);
|
||||
|
||||
res = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*) &gpuscratchpad);
|
||||
res = clSetKernelArg(kernel, 7, LOCAL_SIZE * sizeof(cl_uint), NULL);
|
||||
res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuscratchpad);
|
||||
|
||||
// u64 starttime = utime();
|
||||
size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE };
|
||||
@@ -202,34 +572,39 @@ i32 gpusolve(cnf* c) {
|
||||
}
|
||||
// u64 endtime = utime();
|
||||
|
||||
if (solution[0] == 1) {
|
||||
if (solution[0] == 0) {
|
||||
printf("UNSAT\n");
|
||||
} else if (solution[0] == 0) {
|
||||
printf("SAT\n");
|
||||
for (u32 k = 0; k < c->varcnt; ++k) {
|
||||
u32 vind = (c->varcnt - 1) - k;
|
||||
} else if (solution[0] == 1) {
|
||||
printf("SAT: ");
|
||||
for (u32 k = 0; k < c->cnts[0]; ++k) {
|
||||
u32 vind = (c->cnts[0] - 1) - k;
|
||||
u32 iind = vind >> 5U;
|
||||
u32 bind = vind & 0b11111U;
|
||||
u8 par = (solution[iind + 1] >> bind) & 1U;
|
||||
printf("%u", par);
|
||||
}
|
||||
printf("\n");
|
||||
if (CHECKASGN) {
|
||||
u8* assigncheck = calloc(c->clausecnt, sizeof(u8));
|
||||
for (u32 i = 0; i < c->litcnt; ++i) {
|
||||
u32 g = ((c->varcnt - 1) - c->variables[i]) >> 5U;
|
||||
u32 h = ((c->varcnt - 1) - c->variables[i]) & 0b11111U;
|
||||
u8 paract = (solution[g + 1] >> h) & 1U;
|
||||
if (c->pars[i] == paract) assigncheck[c->clauses[i]] = true;
|
||||
}
|
||||
for (u32 i = 0; i < c->clausecnt; ++i) {
|
||||
if (!assigncheck[i]) {
|
||||
printf("Failed assignment check\n");
|
||||
solution[0] = 4;
|
||||
u8 checkres = 0;
|
||||
for (u32 i = 0; i < c->cnts[1]; ++i) {
|
||||
checkres = 0;
|
||||
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) {
|
||||
u32 v = c->variables[c->clausedat[3 * i] + j];
|
||||
u32 vv = c->cnts[0] - 1;
|
||||
u32 g = (vv - v) >> 5U;
|
||||
u32 h = (vv - v) & 0b11111U;
|
||||
u8 paract = (solution[g + 1] >> h) & 1U;
|
||||
if (c->parities[c->clausedat[3 * i] + j] == paract) {
|
||||
checkres = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!checkres) break;
|
||||
}
|
||||
if (checkres) {
|
||||
printf(" \xE2\x9C\x93\n");
|
||||
} else {
|
||||
printf(" -\n");
|
||||
}
|
||||
free(assigncheck);
|
||||
printf("Passed assignment check\n");
|
||||
}
|
||||
} else {
|
||||
printf("What the fuck???\n");
|
||||
@@ -248,16 +623,14 @@ i32 gpusolve(cnf* c) {
|
||||
res = clReleaseMemObject(gpuheader);
|
||||
res = clReleaseMemObject(gpulvars);
|
||||
res = clReleaseMemObject(gpuvariables);
|
||||
res = clReleaseMemObject(gpuclauses);
|
||||
res = clReleaseMemObject(gpuparities);
|
||||
res = clReleaseMemObject(gpuoutput);
|
||||
res = clReleaseMemObject(gpuscratchpad);
|
||||
|
||||
res = clReleaseDevice(deviceid);
|
||||
|
||||
i32 retval = solution[0];
|
||||
i32 retval = (i32) solution[0];
|
||||
free(solution);
|
||||
free(source_str);
|
||||
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
|
||||
25
gpusolver.h
25
gpusolver.h
@@ -1,4 +1,27 @@
|
||||
#pragma once
|
||||
#include "cnf.h"
|
||||
#include "ncnf.h"
|
||||
#include <CL/cl.h>
|
||||
|
||||
typedef struct {
|
||||
cl_platform_id platformid;
|
||||
cl_device_id deviceid;
|
||||
cl_uint numdevices;
|
||||
cl_uint numplatforms;
|
||||
char* source_str;
|
||||
size_t source_size;
|
||||
cl_context ctx;
|
||||
cl_command_queue commqueue;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
u64 gpuMemoryMax;
|
||||
u64 gpuLocalMax;
|
||||
u64 gpuAllocMax;
|
||||
u64 gpuCUs;
|
||||
} gpusolver;
|
||||
|
||||
gpusolver* initSolver();
|
||||
i32 gpusolve2(gpusolver* gs, cnf* c);
|
||||
void freeSolver(gpusolver* gs);
|
||||
|
||||
|
||||
i32 gpusolve(cnf* c);
|
||||
117
main.c
117
main.c
@@ -1,10 +1,92 @@
|
||||
#include <stdio.h>
|
||||
#include "cnf.h"
|
||||
#include "gpusolver.h"
|
||||
#include "time.h"
|
||||
#include "tests/masterTest.h"
|
||||
|
||||
#include "gmp.h"
|
||||
#include "rng.h"
|
||||
#include "ncnf.h"
|
||||
#include "cpusolver.h"
|
||||
|
||||
|
||||
|
||||
void printbits(unsigned a) {
|
||||
for (unsigned i = 0; i < 32; ++i) {
|
||||
unsigned ind = 31 - i;
|
||||
printf("%u", (a >> ind) & 1U);
|
||||
}
|
||||
}
|
||||
|
||||
#define TESTS (274877906944LU >> 10U)
|
||||
#define CSZE (83LU)
|
||||
#define eqprob (0.01f)
|
||||
|
||||
|
||||
void mul(u32* c, u32 len, u32* a, u32 b) {
|
||||
u32 carry = 0;
|
||||
for (u32 i = 0; i < len; ++i) {
|
||||
u32 ncarry;
|
||||
u32 blo = a[i] & 0xFFFFU;
|
||||
u32 bhi = a[i] >> 16U;
|
||||
u32 ilo = b & 0xFFFFU;
|
||||
u32 ihi = b >> 16U;
|
||||
|
||||
*(c + i) = ilo * blo;
|
||||
u32 b1 = ilo * bhi;
|
||||
u32 c1 = ihi * blo;
|
||||
ncarry = ihi * bhi;
|
||||
|
||||
b1 += c1;
|
||||
ncarry += (b1 < c1) << 16U;
|
||||
u32 bblo = b1 & 0xFFFFU;
|
||||
u32 bbhi = b1 >> 16U;
|
||||
bblo <<= 16U;
|
||||
*(c + i) += bblo;
|
||||
u8 acarry = *(c + i) < bblo;
|
||||
|
||||
ncarry += bbhi + acarry;
|
||||
c[i] += carry;
|
||||
ncarry += c[i] < carry;
|
||||
carry = ncarry;
|
||||
}
|
||||
}
|
||||
|
||||
i32 runuf20lol() {
|
||||
u32 passed = 0;
|
||||
u64 tottime = 0;
|
||||
for (u32 i = 0; i < 1000; ++i) {
|
||||
char buf[128];
|
||||
i32 len = sprintf(buf, "/home/lev/Downloads/uf20/uf20-0%u.cnf", i + 1);
|
||||
|
||||
debugsolve(buf);
|
||||
|
||||
passed++;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
i32 runuf50lol() {
|
||||
u32 passed = 0;
|
||||
u64 tottime = 0;
|
||||
for (u32 i = 0; i < 1000; ++i) {
|
||||
char buf[128];
|
||||
i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1);
|
||||
|
||||
debugsolve(buf);
|
||||
|
||||
passed++;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
// debugsolve("/home/lev/Downloads/uf50/uf50-01.cnf");
|
||||
|
||||
|
||||
// runuf20lol();
|
||||
// runTests();
|
||||
|
||||
/*
|
||||
srand( utime());
|
||||
u32 cnt = 4096;
|
||||
@@ -28,7 +110,35 @@ int main() {
|
||||
*/
|
||||
|
||||
/* Expects a path to a DIMACS file */
|
||||
cnf* c = readDIMACS("/home/lev/Downloads/uf50/uf50-088.cnf");
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
cnf* c = readDIMACS("/home/heka/Downloads/uf20/uf20-03.cnf");
|
||||
sortlastnum(c);
|
||||
|
||||
// gpusolve(c);
|
||||
|
||||
|
||||
gpusolver* gs = initSolver();
|
||||
if (gs == NULL) return -1;
|
||||
|
||||
i32 res = gpusolve2(gs, c);
|
||||
|
||||
freeSolver(gs);
|
||||
|
||||
freecnf(c);
|
||||
|
||||
return 0;
|
||||
|
||||
|
||||
// runTests();
|
||||
// return 0;
|
||||
/*
|
||||
|
||||
// printf("%u\n", c->litcnt);
|
||||
|
||||
// for (u32 i = 0; i < c->clausecnt; ++i) printf("%u ", c->lastvars[i]);
|
||||
@@ -46,5 +156,6 @@ int main() {
|
||||
// if (res == 1) break;
|
||||
freecnf(c);
|
||||
|
||||
return 0;
|
||||
return 0;*/
|
||||
|
||||
}
|
||||
|
||||
238
ncnf.c
Normal file
238
ncnf.c
Normal file
@@ -0,0 +1,238 @@
|
||||
#include "ncnf.h"
|
||||
|
||||
cnf* readDIMACS(char* path) {
|
||||
cnf* c = malloc(sizeof(cnf));
|
||||
CHECK(c, "Failed to alloc CNF struct\n")
|
||||
|
||||
FILE* f = fopen(path, "r");
|
||||
CHECK(f, "Failed to open file\n")
|
||||
|
||||
u32 bufsize = 1000;
|
||||
char* buf = malloc(sizeof(char) * bufsize);
|
||||
CHECK(buf, "Failed to alloc read buffer\n")
|
||||
|
||||
CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n")
|
||||
|
||||
// 'c' marks the beginning of a comment line in DIMACs. Skip this line
|
||||
while (buf[0] == 'c') {
|
||||
CHECK(fgets(buf, sizeof(char) * bufsize, f), "Failed to read file\n")
|
||||
}
|
||||
|
||||
// The first non-comment line is the header, should have format 'p cnf [varcnt] [clausecnt]
|
||||
// We skip the first bit by iterating and skipping any character that's p, c, n, f, or space
|
||||
char* temp = buf;
|
||||
while (*temp == ' ' || *temp == 'p' || *temp == 'c' || *temp == 'n' || *temp == 'f') {
|
||||
temp++;
|
||||
}
|
||||
|
||||
// Char by char we read in the number of variables
|
||||
u32* varcnt = &(c->cnts[0]);
|
||||
*varcnt = 0;
|
||||
while (((u8) (*temp - '0')) < 10) {
|
||||
*varcnt *= 10;
|
||||
*varcnt += (*temp - '0');
|
||||
temp++;
|
||||
}
|
||||
|
||||
// Skip any trailing whitespace
|
||||
while (*temp == ' ') temp++;
|
||||
|
||||
// Read in clausecnt
|
||||
u32* clausecnt = &(c->cnts[1]);
|
||||
*clausecnt = 0;
|
||||
while (((u8) (*temp - '0')) < 10) {
|
||||
*clausecnt *= 10;
|
||||
*clausecnt += (*temp - '0');
|
||||
temp++;
|
||||
}
|
||||
|
||||
// Resize line buffer: the maximum clause size is varcnt, the maximum size of a u32 in decimal
|
||||
// is 10, and conservatively given 2 extra characters for the - symbol and space delimiting,
|
||||
// resizing the buffer to this should mean we never encounter a clause we fail to read.
|
||||
u32 nsize = sizeof(char) * ((*varcnt) * 12LU) + 10;
|
||||
char* nbuf = realloc(buf, nsize);
|
||||
CHECK(nbuf, "Failed to realloc read buffer\n")
|
||||
|
||||
buf = nbuf;
|
||||
|
||||
u32 cnt = 0;
|
||||
u32 cap = 32;
|
||||
|
||||
c->clausedat = calloc(*clausecnt, sizeof(u32) * 3);
|
||||
CHECK(c->clausedat, "Failed to allocate clause data\n")
|
||||
c->index = calloc(*varcnt, sizeof(u32));
|
||||
CHECK(c->index, "Failed to allocate clause index\n")
|
||||
c->variables = calloc(cap, sizeof(u32));
|
||||
CHECK(c->variables, "Failed to allocate literal variables\n")
|
||||
c->parities = calloc(cap, sizeof(u8));
|
||||
CHECK(c->parities, "Failed to allocate literal parities\n");
|
||||
|
||||
for (u32 i = 0; i < *clausecnt; ++i) {
|
||||
c->clausedat[3 * i] = cnt;
|
||||
CHECK(fgets(buf, nsize, f), "Failed to read clause\n");
|
||||
|
||||
temp = buf;
|
||||
while (*temp == ' ') temp++;
|
||||
|
||||
bool empty = true;
|
||||
bool tr = true;
|
||||
while (*temp != '\n') {
|
||||
if (cnt == cap) {
|
||||
// Out of space to add more literals, realloc arrays
|
||||
u32 ncap = cap << 1U;
|
||||
c->variables = realloc(c->variables, sizeof(u32) * ncap);
|
||||
memset(c->variables + cap, 0, sizeof(u32) * cap);
|
||||
CHECK(c->variables, "Failed to realloc variable array\n");
|
||||
c->parities = realloc(c->parities, sizeof(u8) * ncap);
|
||||
memset(c->parities + cap, 0, sizeof(u8) * cap);
|
||||
CHECK(c->parities, "Failed to realloc parity array\n");
|
||||
cap = ncap;
|
||||
}
|
||||
|
||||
if (*temp == '-') {
|
||||
tr = false;
|
||||
} else if (((u8) (*temp - '0')) < 10) {
|
||||
// Read in the literal's digits
|
||||
c->variables[cnt] *= 10;
|
||||
c->variables[cnt] += (*temp - '0');
|
||||
} else {
|
||||
// Skip any whitespace and pack the read value into the arrays, along with
|
||||
// any additional data
|
||||
while (temp[1] == ' ') temp++;
|
||||
c->parities[cnt] = tr;
|
||||
c->variables[cnt] -= 1;
|
||||
if (c->clausedat[3 * i + 2] < c->variables[cnt]) c->clausedat[3 * i + 2] = c->variables[cnt];
|
||||
empty = false;
|
||||
tr = true;
|
||||
cnt++;
|
||||
if (temp[1] == '0') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
temp++;
|
||||
}
|
||||
|
||||
if (empty) {
|
||||
printf("UNSAT: Empty clause %u\n", i);
|
||||
return NULL;
|
||||
}
|
||||
c->clausedat[3 * i + 1] = cnt - c->clausedat[3 * i];
|
||||
}
|
||||
|
||||
// Lastvars is set to the index of the last variable of each clause. However, because in the
|
||||
// counter, the last variable has the lsb position, we flip the value of lastvars
|
||||
for (u32 i = 0; i < *clausecnt; ++i) {
|
||||
c->clausedat[3 * i + 2] += 1U;
|
||||
c->clausedat[3 * i + 2] = *varcnt - c->clausedat[3 * i + 2];
|
||||
}
|
||||
|
||||
// Realloc the arrays to exactly match the number of literals
|
||||
c->variables = realloc(c->variables, sizeof(u32) * cnt);
|
||||
c->parities = realloc(c->parities, sizeof(u8) * cnt);
|
||||
c->cnts[2] = cnt;
|
||||
free(buf);
|
||||
if (fclose(f)) {
|
||||
printf("Failed to close file\n");
|
||||
return NULL;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
void printcnf(cnf* c) {
|
||||
printf("p cnf %u %u\n", c->cnts[0], c->cnts[1]);
|
||||
for (u32 i = 0; i < c->cnts[1]; ++i) {
|
||||
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) {
|
||||
if (c->parities[c->clausedat[3 * i] + j] == 0) printf("-");
|
||||
printf("%u ", c->variables[c->clausedat[3 * i] + j] + 1);
|
||||
}
|
||||
printf("0\n");
|
||||
}
|
||||
}
|
||||
|
||||
void freecnf(cnf* c) {
|
||||
free(c->clausedat);
|
||||
free(c->variables);
|
||||
free(c->parities);
|
||||
free(c);
|
||||
}
|
||||
|
||||
const u64 RBITS = 8;
|
||||
const u64 RSIZE = 1LU << RBITS;
|
||||
const u64 RLVLS = (31LU / RBITS) + 1;
|
||||
const u64 RMASK = RSIZE - 1;
|
||||
|
||||
void sortlastnum(cnf* c) {
|
||||
bool v = false;
|
||||
u32* d = calloc(c->cnts[1], sizeof(u32) * 3);
|
||||
|
||||
u32 qb[RLVLS * RSIZE];
|
||||
u32 qe[RLVLS * RSIZE];
|
||||
memset(qb, 0, sizeof(u32) * RLVLS * RSIZE);
|
||||
memset(qe, 0, sizeof(u32) * RLVLS * RSIZE);
|
||||
|
||||
for (u32 pass = 0; pass < RLVLS; ++pass) {
|
||||
u32 shift = pass * RBITS;
|
||||
|
||||
for (u32 i = 0; i < c->cnts[1]; ++i) {
|
||||
u32 val = UINT32_MAX - c->clausedat[3 * i + 2];
|
||||
u32 ind = (val >> shift) & RMASK;
|
||||
qe[pass * RSIZE + ind]++;
|
||||
}
|
||||
|
||||
u32 uc = 0;
|
||||
for (u32 i = 0; i < RSIZE; ++i) if (qe[pass * RSIZE + i]) uc++;
|
||||
if (uc == 1) continue;
|
||||
|
||||
qb[pass * RSIZE] = 0;
|
||||
for (u32 i = 1; i < RSIZE; ++i) {
|
||||
qb[pass * RSIZE + i] = qb[pass * RSIZE + i - 1] + qe[pass * RSIZE + i - 1];
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < c->cnts[1]; ++i) {
|
||||
u32 val = UINT32_MAX - c->clausedat[3 * i + 2];
|
||||
u32 ind = (val >> shift) & RMASK;
|
||||
memcpy(d + 3 * qb[pass * RSIZE + ind], c->clausedat + 3 * i, sizeof(u32) * 3);
|
||||
qb[pass * RSIZE + ind]++;
|
||||
__builtin_prefetch(d + 3 * qb[pass * RSIZE + ind] + 1);
|
||||
}
|
||||
|
||||
u32* tptr = c->clausedat;
|
||||
c->clausedat = d;
|
||||
d = tptr;
|
||||
v = !v;
|
||||
}
|
||||
|
||||
if (v) {
|
||||
u32* tptr = c->clausedat;
|
||||
c->clausedat = d;
|
||||
d = tptr;
|
||||
memcpy(c->clausedat, d, sizeof(u32) * 3 * c->cnts[1]);
|
||||
}
|
||||
|
||||
free(d);
|
||||
|
||||
// TODO: Rewrite, the following is copied from Gábor Kusper's implementation
|
||||
u32 lastNum = 0;
|
||||
for (u32 i = c->cnts[1] - 1; i < c->cnts[1]; --i) {
|
||||
if (c->clausedat[3 * i + 2] > lastNum) {
|
||||
while (lastNum < c->clausedat[3 * i + 2]) {
|
||||
c->index[lastNum] = i + 1;
|
||||
lastNum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
u32 corrInd = 0;
|
||||
while (c->index[corrInd] == c->cnts[1]) {
|
||||
corrInd++;
|
||||
}
|
||||
|
||||
if (corrInd != 0) {
|
||||
u32 goodVal = c->index[corrInd];
|
||||
do {
|
||||
corrInd--;
|
||||
c->index[corrInd] = goodVal;
|
||||
} while (corrInd != 0);
|
||||
}
|
||||
}
|
||||
30
ncnf.h
Normal file
30
ncnf.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define CHECK(X, Y) if (X == NULL) { \
|
||||
printf(Y); \
|
||||
return NULL; \
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
u32 cnts[3]; // { varcnt, clausecnt }
|
||||
u32* clausedat; // { ind, len, jval }
|
||||
u32* index;
|
||||
u32* variables;
|
||||
u8* parities;
|
||||
} cnf;
|
||||
|
||||
cnf* readDIMACS(char* path);
|
||||
|
||||
void printcnf(cnf* c);
|
||||
|
||||
void sortlastnum(cnf* c);
|
||||
|
||||
void freecnf(cnf* c);
|
||||
|
||||
/* -mavx2 -O3 -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts -ftree-vectorize -ftracer -funroll-all-loops
|
||||
*
|
||||
*/
|
||||
196
psat.cl
196
psat.cl
@@ -32,120 +32,122 @@ static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) {
|
||||
output[0] = 2;
|
||||
void mul(uint* c, uint len, uint* a, uint b) {
|
||||
uint carry = 0;
|
||||
for (uint i = 0; i < len; ++i) {
|
||||
uint ncarry;
|
||||
uint blo = a[i] & 0xFFFFU;
|
||||
uint bhi = a[i] >> 16U;
|
||||
uint ilo = b & 0xFFFFU;
|
||||
uint ihi = b >> 16U;
|
||||
|
||||
__local uint setmax;
|
||||
*(c + i) = ilo * blo;
|
||||
uint b1 = ilo * bhi;
|
||||
uint c1 = ihi * blo;
|
||||
ncarry = ihi * bhi;
|
||||
|
||||
uint cnt = cnfheader[0];
|
||||
uint vcnt = cnfheader[1];
|
||||
uint ccnt = cnfheader[2];
|
||||
b1 += c1;
|
||||
ncarry += (b1 < c1) << 16U;
|
||||
uint bblo = b1 & 0xFFFFU;
|
||||
uint bbhi = b1 >> 16U;
|
||||
bblo <<= 16U;
|
||||
*(c + i) += bblo;
|
||||
uchar acarry = *(c + i) < bblo;
|
||||
|
||||
uint wcnt = 1 + (vcnt >> 5U);
|
||||
ncarry += bbhi + acarry;
|
||||
c[i] += carry;
|
||||
ncarry += c[i] < carry;
|
||||
carry = ncarry;
|
||||
}
|
||||
}
|
||||
|
||||
uint maxctr = 1U << (vcnt & 0b11111U);
|
||||
|
||||
//uint glbid = get_global_id(0);
|
||||
//uint glbsz = get_global_size(0);
|
||||
|
||||
|
||||
uint locid = get_local_id(0);
|
||||
uint locsz = get_local_size(0);
|
||||
__kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clausedat, __global const uint* vars, __global const uchar* pars, __global uint* output, __global uint* lctrs) {
|
||||
// uint grpid = get_group_id(0);
|
||||
// uint grpcn = get_num_groups(0);
|
||||
uint globid = get_global_id(0);
|
||||
uint globsz = get_global_size(0);
|
||||
|
||||
// Zero out the counter
|
||||
for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;
|
||||
uint wcnt = 1U + (cnfhdr[0] >> 5U);
|
||||
|
||||
// Set all scratchpad clauses to true
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
uchar cond = (j + locid) < ccnt;
|
||||
j = j * cond + (!cond) * (ccnt - locid - 1);
|
||||
scratchpad[j + locid] = 1;
|
||||
uint mode = 2;
|
||||
uint index = 0;
|
||||
uint addval = 0;
|
||||
|
||||
//uint ctroff =
|
||||
uint* ctr = lctrs + wcnt * 2 * globid;
|
||||
uint* max = lctrs + wcnt * (2 * globid + 1);
|
||||
|
||||
for (uint i = 0; i < wcnt; ++i) {
|
||||
ctr[i] = max[i] = 0;
|
||||
}
|
||||
|
||||
__local uint firstind[1];
|
||||
while (output[0] == 2) {
|
||||
firstind[0] = ccnt;
|
||||
|
||||
setmax = 0;
|
||||
uint maxnumx = 0;
|
||||
|
||||
for (uint j = 0; j < cnt; j += locsz) {
|
||||
|
||||
uchar cond = (j + locid) < cnt;
|
||||
// Last element cap
|
||||
j = j * cond + (!cond) * (cnt - locid - 1);
|
||||
uint varind = vars[j + locid];
|
||||
varind = (vcnt - 1) - varind;
|
||||
uint iind = varind >> 5U;
|
||||
uint bind = varind & 0b11111U;
|
||||
uchar cpar = (output[iind + 1] >> bind) & 1U;
|
||||
|
||||
if (cpar != pars[j + locid]) {
|
||||
scratchpad[clauses[j + locid]] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
|
||||
mul(ctr, wcnt, output + 1, globid);
|
||||
if (globid == globsz - 1) {
|
||||
stateaddpow(wcnt, max, cnfhdr[0]);
|
||||
} else {
|
||||
mul(max, wcnt, output + 1, globid + 1);
|
||||
}
|
||||
//printf("%u %u\n", ctr[0], max[0]);
|
||||
//printf("%u %u\n", wcnt * 2 * globid, wcnt * (2 * globid + 1));
|
||||
|
||||
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
if (scratchpad[j + locid] == 1 && (j + locid) < ccnt) {
|
||||
setmax = 1;
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint varcnt = cnfhdr[0] - 1;
|
||||
while (output[0] == 0) {
|
||||
|
||||
if (setmax) {
|
||||
// Set maxval array to zero
|
||||
maxvals[locid] = 0;
|
||||
uint chkmsk = 0xFFFFFFFFU * (mode == 2U);
|
||||
uint chkcls = index & chkmsk;
|
||||
uint chkind = clausedat[3 * chkcls] + (addval & chkmsk);
|
||||
uint var = vars[chkind];
|
||||
uchar par = pars[chkind];
|
||||
uint vword = (varcnt - var) >> 5U;
|
||||
uint vbit = (varcnt - var) & 0b11111U;
|
||||
uchar corpar = (ctr[vword] >> vbit) & 1U;
|
||||
uchar isvalid = (par == corpar);
|
||||
uchar islvar = ((addval + 1) == clausedat[3 * chkcls + 1]);
|
||||
uchar isbchk0 = (mode == 2U);
|
||||
uchar isbchk1 = isbchk0 & isvalid;
|
||||
uchar isbchk2 = isbchk1 & islvar;
|
||||
uint j = clausedat[3 * chkcls + 2];
|
||||
mode -= 2 * isbchk2;
|
||||
//if (isbchk2 && globid == 0) printf("%u j: %u\n", globid, j);
|
||||
index = (j >> 5U) * isbchk2 + index * (!isbchk2);
|
||||
addval = (1U << (j & 0b11111U)) * isbchk2 + addval * (!isbchk2);
|
||||
addval += ((isbchk1) & (!islvar));
|
||||
uchar isbchk3 = (isbchk0 & (!isvalid));
|
||||
addval *= (!isbchk3);
|
||||
index += (isbchk3);
|
||||
uchar issat = (index == cnfhdr[1]) * (isbchk3);
|
||||
|
||||
// Accumulate and reduce the maximums
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
//uint a = maxvals[locid];
|
||||
//uint b = lvars[j + locid];
|
||||
// uint c = max(a, b);
|
||||
if ((j + locid) < ccnt && scratchpad[j + locid] == 1) {
|
||||
//maxvals[locid] = c;
|
||||
atomic_min(firstind, (j + locid));
|
||||
uint cmpaddind = index * (mode != 2U);
|
||||
uint nval = ctr[cmpaddind] + addval; // Find the result of the current step if it was addition
|
||||
addval = (nval < ctr[cmpaddind]) * (mode == 0) + (addval) * (mode == 2U); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone.
|
||||
ctr[cmpaddind] = nval * ((mode == 0) & !issat) + ctr[cmpaddind] * ((mode != 0) | issat); // If in add mode, set new ctr val, otherwise leave unchanged
|
||||
addval -= (ctr[cmpaddind] < max[cmpaddind]) * (mode == 1); // If in comparison mode, decrement addval if less than
|
||||
addval += (ctr[cmpaddind] > max[cmpaddind]) * (mode == 1); // If in comparison mode, increment addval if greater than
|
||||
uchar addcond = (addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word
|
||||
uchar cmpcond = (addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word
|
||||
uchar exittime = (mode == 1) & cmpcond & (addval != -1);
|
||||
exittime |= issat;
|
||||
if (exittime) { // If in cmpmode and the comparison result is not less than, unsat
|
||||
if (issat) {
|
||||
if (atomic_cmpxchg(output, 0, 1) == 0) {
|
||||
for (uint i = 0; i < wcnt; ++i) {
|
||||
output[i + 1] = ~ctr[i];
|
||||
}
|
||||
output[0] = 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint maxj = lvars[firstind[0]];
|
||||
|
||||
// Set all scratchpad clauses to true
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
uchar cond = (j + locid) < ccnt;
|
||||
j = j * cond + (!cond) * (ccnt - locid - 1);
|
||||
scratchpad[j + locid] = 1;
|
||||
}
|
||||
|
||||
// Final reduction pass
|
||||
/*
|
||||
uint maxj = maxvals[0];
|
||||
for (uint j = 1; j < locsz; ++j) {
|
||||
maxj = max(maxj, maxvals[j]);
|
||||
}
|
||||
*/
|
||||
|
||||
// Add to the counter
|
||||
if (locid == 0) {
|
||||
stateaddpow(wcnt, output + 1, maxj);
|
||||
}
|
||||
|
||||
if (output[wcnt] >= maxctr) {
|
||||
output[0] = 1;
|
||||
}
|
||||
} else {
|
||||
output[0] = 0;
|
||||
if (locid == 0) {
|
||||
for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];
|
||||
}
|
||||
//if (globid == 0) printf("fuck %u %u\n", ctr[0], max[0]);
|
||||
break;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uchar cmpdone = cmpcond & (mode == 1); // if comparison completion conditions are satisfied and in CMP mode
|
||||
uint addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1
|
||||
index = addindex * (mode == 0) + (index - (mode == 1)) * (mode != 0); // If in add mode, use addindex; if in cmp mode, decrement index by 1
|
||||
index *= !cmpdone;
|
||||
addval *= !(((addcond) & (mode == 0)) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged.
|
||||
mode += addcond * (mode == 0) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode.
|
||||
}
|
||||
}
|
||||
278
psatv0.cl
Normal file
278
psatv0.cl
Normal file
@@ -0,0 +1,278 @@
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
|
||||
|
||||
#define DEBUG
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DBG(X) printf("%s\n", (X))
|
||||
#else
|
||||
#define DBG(X) do {} while (0)
|
||||
#endif
|
||||
|
||||
static inline void printbits(uint a) {
|
||||
for (uint i = 0; i < 32; ++i) {
|
||||
uint ind = 31 - i;
|
||||
printf("%u", (a >> ind) & 1U);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
|
||||
uint corpow = pow & 0b11111U;
|
||||
uint startind = pow >> 5U;
|
||||
uint tr = 1U << corpow;
|
||||
uint tval = state[startind] + tr;
|
||||
bool carry = tval < state[startind];
|
||||
state[startind] = tval;
|
||||
if (carry) {
|
||||
uint i = startind + 1;
|
||||
for ( ; i < wcnt; ++i) {
|
||||
state[i]++;
|
||||
if (state[i]) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
|
||||
uint corpow = pow & 0b11111U;
|
||||
uint startind = pow >> 5U;
|
||||
uint tr = 1U << corpow;
|
||||
uint tval = state[startind] + tr;
|
||||
bool carry = tval < state[startind];
|
||||
uint i = startind + 1;
|
||||
for ( ; i < wcnt; ++i) {
|
||||
state[i] = carry + state[i];
|
||||
carry = (state[i] == 0) * carry;
|
||||
}
|
||||
|
||||
//if (carry) {
|
||||
//uint i = startind + 1;
|
||||
//for ( ; i < wcnt; ++i) {
|
||||
//state[i]++;
|
||||
//if (state[i]) break;
|
||||
//}
|
||||
//}
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) {
|
||||
output[0] = 2;
|
||||
|
||||
__local uint setmax;
|
||||
|
||||
uint cnt = cnfheader[0];
|
||||
uint vcnt = cnfheader[1];
|
||||
uint ccnt = cnfheader[2];
|
||||
|
||||
uint wcnt = 1 + (vcnt >> 5U);
|
||||
|
||||
uint maxctr = 1U << (vcnt & 0b11111U);
|
||||
|
||||
uint glbid = get_global_id(0);
|
||||
uint glbsz = get_global_size(0);
|
||||
|
||||
/*
|
||||
uint locid = get_local_id(0);
|
||||
uint locsz = get_local_size(0);
|
||||
uint grpid = get_group_id(0);
|
||||
uint grpcn = get_num_groups(0);
|
||||
*/
|
||||
|
||||
// Zero out the counter
|
||||
for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;
|
||||
|
||||
// Set all scratchpad clauses to true
|
||||
for (uint j = 0; j < ccnt; j += glbsz) {
|
||||
uchar cond = (j + glbid) < ccnt;
|
||||
// If ptr would go past end of array, set it to last element
|
||||
j = j * cond + (!cond) * (ccnt - glbid - 1);
|
||||
scratchpad[j + glbid] = 1;
|
||||
// if ((j + glbid) < ccnt)
|
||||
}
|
||||
|
||||
bool done = false;
|
||||
__local uint iter;
|
||||
__local uint firstind[1];
|
||||
iter = 0;
|
||||
while (output[0] == 2) {
|
||||
firstind[0] = ccnt;
|
||||
if (glbid == 0) {
|
||||
iter = iter + 1;
|
||||
}
|
||||
// if (glbid == 0) printf("%s\n", ":~");
|
||||
setmax = 0;
|
||||
uint maxnumx = 0;
|
||||
|
||||
for (uint j = 0; j < cnt; j += glbsz) {
|
||||
|
||||
uchar cond = (j + glbid) < cnt;
|
||||
// Last element cap
|
||||
j = j * cond + (!cond) * (cnt - glbid - 1);
|
||||
uint varind = vars[j + glbid];
|
||||
varind = (vcnt - 1) - varind;
|
||||
uint iind = varind >> 5U;
|
||||
uint bind = varind & 0b11111U;
|
||||
uchar cpar = (output[iind + 1] >> bind) & 1U;
|
||||
//uchar cond2 = (cpar != pars[j + glbid]);
|
||||
//uint dirind = cond2 * clauses[j + glbid] + (!cond2) * ccnt;
|
||||
// scratchpad[dirind] = 0;
|
||||
|
||||
if (cpar != pars[j + glbid]) {
|
||||
scratchpad[clauses[j + glbid]] = 0;
|
||||
/*
|
||||
if (clauses[j + glbid] == 50) {
|
||||
printf("50 failed on (%u) %u (%u) %u / %u %u\n", pars[j + glbid], vars[j + glbid], cpar, varind, iind, bind);
|
||||
printbits(output[2]);
|
||||
printbits(output[1]);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
if ((j + glbid) < cnt) {
|
||||
uint varind = vars[j + glbid];
|
||||
varind = (vcnt - 1) - varind;
|
||||
uint iind = varind >> 5U;
|
||||
uint bind = varind & 0b11111U;
|
||||
uchar cpar = (output[iind + 1] >> bind) & 1U;
|
||||
if (cpar != pars[j + glbid]) {
|
||||
scratchpad[clauses[j + glbid]] = 0;
|
||||
// printf("z %u %u\n", j + glbid, clauses[j + glbid]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
|
||||
for (uint j = 0; j < ccnt; j += glbsz) {
|
||||
/*
|
||||
if (((j + glbid) < ccnt) && (scratchpad[j + glbid] == 1)) {
|
||||
setmax = 1;
|
||||
// printf("%u\n", (~output[1]) & 0b11111);
|
||||
// printf("%u%u%u%u%u\n", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);
|
||||
// printf("%u - %u\n", j + glbid, scratchpad[j + glbid]);
|
||||
}*/
|
||||
|
||||
//uchar cond = (j + glbid) < ccnt;
|
||||
// Last element cap
|
||||
// j = j * cond + (!cond) * (ccnt - glbid - 1);
|
||||
if (scratchpad[j + glbid] == 1 && (j + glbid) < ccnt) {
|
||||
setmax = 1;
|
||||
// printf("s %u %u %u\n", j + glbid, lvars[j + glbid], iter);
|
||||
// printf("c %u %u %u\n", j + glbid, scratchpad[j + glbid], lvars[j + glbid]);
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
/*
|
||||
if (glbid == 0) {
|
||||
printf(">> %u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u\n", (output[1] >> 19) & 1U,(output[1] >> 18) & 1U, (output[1] >> 17) & 1U, (output[1] >> 16) & 1U, (output[1] >> 15) & 1U, (output[1] >> 14) & 1U, (output[1] >> 13) & 1U, (output[1] >> 12) & 1U, (output[1] >> 11) & 1U, (output[1] >> 10) & 1U, (output[1] >> 9) & 1U, (output[1] >> 8) & 1U, (output[1] >> 7) & 1U, (output[1] >> 6) & 1U, (output[1] >> 5) & 1U, (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
if (setmax) {
|
||||
// Set maxval array to zero
|
||||
maxvals[glbid] = 0;
|
||||
|
||||
// Accumulate and reduce the maximums
|
||||
for (uint j = 0; j < ccnt; j += glbsz) {
|
||||
|
||||
uint a = maxvals[glbid];
|
||||
uint b = lvars[j + glbid];
|
||||
uint c = max(a, b);
|
||||
|
||||
//uchar cond = (j + glbid) < ccnt;
|
||||
//uint d = glbid * cond + (!cond) * glbsz;
|
||||
//maxvals[d] = c;
|
||||
if ((j + glbid) < ccnt && scratchpad[j + glbid] == 1) {
|
||||
maxvals[glbid] = c;
|
||||
// printf("min %u\n", j + glbid);
|
||||
// atomic_min(firstind, (j + glbid));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
if (glbid == 0) {
|
||||
for (uint k = 0; k < ccnt; ++k) {
|
||||
if (scratchpad[k] == 1) {
|
||||
printf("s %u %u\n", k, lvars[k]); //, iter);
|
||||
}
|
||||
}
|
||||
|
||||
printf("z %u %u\n", scratchpad[50], scratchpad[199]);
|
||||
}
|
||||
*/
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// uint maxj = lvars[firstind[0]];
|
||||
|
||||
// Set all scratchpad clauses to true
|
||||
for (uint j = 0; j < ccnt; j += glbsz) {
|
||||
uchar cond = (j + glbid) < ccnt;
|
||||
// If ptr would go past end of array, set it to last element
|
||||
j = j * cond + (!cond) * (ccnt - glbid - 1);
|
||||
scratchpad[j + glbid] = 1;
|
||||
// if ((j + glbid) < ccnt)
|
||||
}
|
||||
|
||||
// Final reduction pass
|
||||
|
||||
uint maxj = maxvals[0];
|
||||
for (uint j = 1; j < glbsz; ++j) {
|
||||
maxj = max(maxj, maxvals[j]);
|
||||
}
|
||||
|
||||
|
||||
// Add to the counter
|
||||
if (glbid == 0) {
|
||||
// printf("> %u\n", maxj);
|
||||
/*
|
||||
printbits(output[2]);
|
||||
printbits(output[1]);
|
||||
printf("%s", "\n");
|
||||
*/
|
||||
// uint temp = ind;
|
||||
// printf("maxNumx = %u\n", maxj);// firstind[0]);
|
||||
stateaddpow(wcnt, output + 1, maxj);
|
||||
/*
|
||||
printbits(output[2]);
|
||||
printbits(output[1]);
|
||||
printf("%s", "\n");
|
||||
*/
|
||||
/*
|
||||
if (output[2] == 60234U) {
|
||||
printf("%s\n", "YEEDLEY DEET");
|
||||
printbits(output[2]);
|
||||
printbits(output[1]);
|
||||
printf("%s", "\n");
|
||||
}
|
||||
*/
|
||||
|
||||
// printf(">> %u%u%u%u%u\n", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);
|
||||
}
|
||||
|
||||
// Check counter for overflow
|
||||
if (output[wcnt] >= maxctr) {
|
||||
output[0] = 1;
|
||||
}
|
||||
} else {
|
||||
// SAT. Set status and assignment.
|
||||
output[0] = 0;
|
||||
if (glbid == 0) {
|
||||
for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
121
rng.c
Normal file
121
rng.c
Normal file
@@ -0,0 +1,121 @@
|
||||
#include "rng.h"
|
||||
#include <string.h>
|
||||
#include "time.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
static inline u64 rotl(const u64 x, i32 k) {
|
||||
return (x << k) | (x >> (64 - k));
|
||||
}
|
||||
|
||||
void seed(rngstate* s, u64 seed) {
|
||||
/* Unrolled & modified version of splitmix64
|
||||
* https://prng.di.unimi.it/splitmix64.c */
|
||||
u64 z = (seed += 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
s->state[0] = z ^ (z >> 31);
|
||||
|
||||
z = (seed += 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
s->state[1] = z ^ (z >> 31);
|
||||
|
||||
z = (seed += 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
s->state[2] = z ^ (z >> 31);
|
||||
|
||||
z = (seed += 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
s->state[3] = z ^ (z >> 31);
|
||||
|
||||
z = (seed += 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
s->state[4] = z ^ (z >> 31);
|
||||
|
||||
z = (seed += 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
s->state[5] = z ^ (z >> 31);
|
||||
|
||||
z = (seed += 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
s->state[6] = z ^ (z >> 31);
|
||||
|
||||
z = (seed + 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
s->state[7] = z ^ (z >> 31);
|
||||
|
||||
s->ready = true;
|
||||
}
|
||||
|
||||
u64 ru64(rngstate* s) {
|
||||
if (!s->ready) seed(s, utime());
|
||||
const u64 res = rotl(s->state[1] * 5, 7) * 9;
|
||||
const u64 t = s->state[1] << 11U;
|
||||
|
||||
s->state[2] ^= s->state[0];
|
||||
s->state[5] ^= s->state[1];
|
||||
s->state[1] ^= s->state[2];
|
||||
s->state[7] ^= s->state[3];
|
||||
s->state[3] ^= s->state[4];
|
||||
s->state[4] ^= s->state[5];
|
||||
s->state[0] ^= s->state[6];
|
||||
s->state[6] ^= s->state[7];
|
||||
|
||||
s->state[6] ^= t;
|
||||
|
||||
s->state[7] = rotl(s->state[7], 21);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
u64 ru64pow(rngstate* s, u64 max) {
|
||||
u64 v = ru64(s) % max;
|
||||
v++;
|
||||
return max / v;
|
||||
}
|
||||
|
||||
u32 ru32(rngstate* s) {
|
||||
return (u32) ru64(s);
|
||||
}
|
||||
|
||||
f64 rf64(rngstate* s) {
|
||||
return ((f64) ru64(s)) / ((f64) UINT64_MAX);
|
||||
}
|
||||
|
||||
f32 rf32(rngstate* s) {
|
||||
return ((f32) ru32(s)) / ((f32) UINT32_MAX);
|
||||
}
|
||||
|
||||
void jump(rngstate* s) {
|
||||
static const u64 JUMP[] = { 0x33ed89b6e7a353f9, 0x760083d7955323be, 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db };
|
||||
u64 t[8];
|
||||
memset(t, 0, sizeof(u64) * 8);
|
||||
for (i32 i = 0; i < 8; ++i) {
|
||||
for (i32 b = 0; b < 64; ++b) {
|
||||
if (JUMP[i] & 1LU << b) for (i32 w = 0; w < 8; ++w) t[w] ^= s->state[w];
|
||||
ru64(s);
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(s->state, t, sizeof(u64) * 8);
|
||||
}
|
||||
|
||||
void longjump(rngstate* s) {
|
||||
static const u64 LONGJUMP[] = { 0x11467fef8f921d28, 0xa2a819f2e79c8ea8, 0xa8299fc284b3959a, 0xb4d347340ca63ee1, 0x1cb0940bedbff6ce, 0xd956c5c4fa1f8e17, 0x915e38fd4eda93bc, 0x5b3ccdfa5d7daca5 };
|
||||
u64 t[8];
|
||||
memset(t, 0, sizeof(u64) * 8);
|
||||
for (i32 i = 0; i < 8; ++i) {
|
||||
for (i32 b = 0; b < 64; ++b) {
|
||||
if (LONGJUMP[i] & 1LU << b) for (i32 w = 0; w < 8; ++w) t[w] ^= s->state[w];
|
||||
ru64(s);
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(s->state, t, sizeof(u64) * 8);
|
||||
}
|
||||
15
rng.h
Normal file
15
rng.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
#include "types.h"
|
||||
|
||||
typedef struct {
|
||||
bool ready;
|
||||
u64 state[8];
|
||||
} rngstate;
|
||||
|
||||
void seed(rngstate* s, u64 seed);
|
||||
u64 ru64(rngstate* s);
|
||||
u32 ru32(rngstate* s);
|
||||
f64 rf64(rngstate* s);
|
||||
f32 rf32(rngstate* s);
|
||||
void jump(rngstate* s);
|
||||
u64 ru64pow(rngstate* s, u64 max);
|
||||
@@ -3,6 +3,7 @@
|
||||
#include "../cnf.h"
|
||||
#include "../gpusolver.h"
|
||||
#include "../time.h"
|
||||
#include "../cpusolver.h"
|
||||
|
||||
i32 runTests() {
|
||||
i32 res = runuf20();
|
||||
@@ -33,15 +34,15 @@ i32 runuf20() {
|
||||
|
||||
cnf* c = readDIMACS(buf);
|
||||
|
||||
sortlastnum(c, c->litcnt);
|
||||
sortlastnum(c);
|
||||
|
||||
u64 start = utime();
|
||||
i32 res = gpusolve(c);
|
||||
i32 res = cpusolve(c);
|
||||
u64 stop = utime();
|
||||
tottime += (stop - start);
|
||||
|
||||
freecnf(c);
|
||||
if (res == 0) passed++;
|
||||
if (res == 1) passed++;
|
||||
}
|
||||
printf("Passed %u / 1000 tests\n", passed);
|
||||
printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
|
||||
@@ -59,15 +60,15 @@ i32 runuf50() {
|
||||
|
||||
cnf* c = readDIMACS(buf);
|
||||
|
||||
sortlastnum(c, c->litcnt);
|
||||
sortlastnum(c);
|
||||
|
||||
u64 start = utime();
|
||||
i32 res = gpusolve(c);
|
||||
i32 res = cpusolve(c);
|
||||
u64 stop = utime();
|
||||
tottime += (stop - start);
|
||||
|
||||
freecnf(c);
|
||||
if (res == 0) passed++;
|
||||
if (res == 1) passed++;
|
||||
}
|
||||
printf("Passed %u / 1000 tests\n", passed);
|
||||
printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
|
||||
@@ -85,15 +86,15 @@ i32 runuuf50() {
|
||||
|
||||
cnf* c = readDIMACS(buf);
|
||||
|
||||
sortlastnum(c, c->litcnt);
|
||||
sortlastnum(c);
|
||||
|
||||
u64 start = utime();
|
||||
i32 res = gpusolve(c);
|
||||
i32 res = cpusolve(c);
|
||||
u64 stop = utime();
|
||||
tottime += (stop - start);
|
||||
|
||||
freecnf(c);
|
||||
if (res == 1) passed++;
|
||||
if (res == 0) passed++;
|
||||
}
|
||||
printf("Passed %u / 1000 tests\n", passed);
|
||||
printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
|
||||
|
||||
Reference in New Issue
Block a user