Compare commits

3 Commits

Author SHA1 Message Date
gothictomato
3f54ad3f6e Commit most recent local copy 2025-04-05 22:32:13 -04:00
gothictomato
53a580d0ec CSFLOC rewrite
Signed-off-by: gothictomato <gothictomato@pm.me>
2022-09-04 10:52:28 -04:00
gothictomato
e2b633c802 Working to make OpenCL recognize the limitations of the device its on, as well as reusing the OpenCL kernel/context for multiple runs.
Signed-off-by: gothictomato <gothictomato@pm.me>
2022-08-27 09:11:29 -04:00
15 changed files with 6189 additions and 233 deletions

4101
1lit.cnf Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.22)
project(psat C)
set(CMAKE_C_STANDARD 99)
# set(CMAKE_C_FLAGS "-mavx2 -O3 -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts -ftree-vectorize -ftracer -funroll-all-loops ")
set(CMAKE_C_FLAGS "-mavx2 -O3 -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts -ftree-vectorize")
add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h ncnf.c ncnf.h rng.h rng.c)
add_executable(psat main.c cnf.c cnf.h time.c time.h types.h gpusolver.c gpusolver.h cpusolver.c cpusolver.h tests/masterTest.c tests/masterTest.h ncnf.c ncnf.h rng.h rng.c csflocref.c csflocref.h)
target_link_libraries(psat -lOpenCL -lgmp)

View File

@@ -1 +1,664 @@
#include "cpusolver.h"
#include "csflocref.h"
static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
uint corpow = pow & 0b11111U;
uint startind = pow >> 5U;
uint tr = 1U << corpow;
uint tval = state[startind] + tr;
bool carry = tval < state[startind];
state[startind] = tval;
if (carry) {
uint i = startind + 1;
for ( ; i < wcnt; ++i) {
state[i]++;
if (state[i]) break;
}
}
}
#define DEBUGSAT
#define ADD (0)
#define CMP (1)
#define BCT (2)
#define CHK (3)
void ctrthings3(cnf* c, u32* state, u32* ctr, const u32* max) {
u32 varcnt = c->cnts[0];
u32 clausecnt = c->cnts[1];
u32 wcnt = 1U + (varcnt >> 5U);
u32* mode = state;
u32* index = state + 1;
u32* addval = state + 2;
// printf("> %u %u %u %u %u\n", *mode, *index, *addval, *ctr, *max);
u8 addmode = (*mode == ADD);
u8 cmpmode = (*mode == CMP);
u8 bctmode = (*mode == BCT);
u8 submode = (*mode == CHK);
// 1 mask for if in subcheck mode, 1 for if not
u32 submask = 0xFFFFFFFFU * submode;
u32 nsubmask = ~submask;
u32 addmask = 0xFFFFFFFFU * addmode;
u32 naddmask = ~addmask;
/* subcheck */
// used to find correct ctr index
u32 varcor = varcnt - 1;
// Mask to prevent oob accesses for when (varcnt / 32) > clausecnt (or smth like that)
u32 currclause = *index & submask;
// clausedat records are 3 words long, multiply index by 3
u32 currclauseshifted = currclause * 3U;
// Retrieve beginning index of clause, add current var index
u32 currvarind = c->clausedat[currclauseshifted] + (*addval & submask);
// Retrieve current var & parity
u32 currvar = c->variables[currvarind];
u8 corrpar = c->parities[currvarind];
// Calculate index in ctr
u32 currvarcorr = (varcor - currvar);
u32 currvarword = currvarcorr >> 5U;
u32 currvarbit = currvarcorr & 0b11111U;
/* Can we make there be 1 ctr read per iteration? */
// If in sub mode, retrieve current var's bit, else return ctr[*index]
u32 ctrind = (currvarword & submask) | (*index & nsubmask);
u32 ctrval = ctr[ctrind];
u32 maxval = max[*index & nsubmask];
// Extract parity bit from ctr
u32 currpar = (ctrval >> currvarbit) & 1U;
// Check if assignment parity matches clause parity
u8 subvalid = currpar == corrpar;
// Don't mask addval because it's not being used for a lookup
u8 islvar = c->clausedat[currclauseshifted + 1U] == (*addval + 1);
u32 jval = c->clausedat[currclauseshifted + 2U];
u32 jword = jval >> 5U;
u32 jbit = 1U << (jval & 0b11111U);
u8 islclause = (clausecnt - 1) <= *index;
u8 endOfCtr = (ctrind == (wcnt - 1));
u8 begOfCtr = (ctrind == 0);
/* add */
u32 nctr = ctrval + *addval;
u8 addoverflow = (nctr < ctrval);
/* cmp */
u8 cmpresult = -(ctrval < maxval);
cmpresult += (ctrval > maxval);
u8 cmpnores = cmpresult == 0;
u8 cmpisless = cmpresult == 255U;
/* bitcnt */
u32 bitcntval = (ctrval & -ctrval);
u32 bitcnt = ((bitcntval & 0x0000FFFFU) != 0) << 4U;
bitcnt |= ((bitcntval & 0x00FF00FF) != 0) << 3U;
bitcnt |= ((bitcntval & 0x0F0F0F0F) != 0) << 2U;
bitcnt |= ((bitcntval & 0x33333333) != 0) << 1U;
bitcnt |= (bitcntval & 0x55555555) != 0;
bitcnt += bitcntval != 0;
bitcntval = 32 - bitcnt;
u8 ctrWordIsZero = ctrval == 0;
// Set current ctr word
// If in add mode, update ctr val, otherwise leave unchanged
ctr[ctrind] = (nctr & addmask) | (ctr[ctrind] & naddmask);
// Set current index
/*
* jword : (subcheck & valid & lastvar)
* index : (subcheck & valid & !lastvar) | (add & (!addoverflow | endOfCtr))
* index + 1 : (subcheck & !valid) | (add & (addoverflow & !endOfCtr)) | (bitcnt & ctrWordIsZero & !endOfCtr)
* index - 1 : (cmp & cmpnores & !begOfCtr)
* 0 : (cmp & (!cmpnores | begOfCtr))
* index[addoverflow + bitcntval] : (bitcnt & (!ctrWordIsZero | endOfCtr)
*/
u32 tempA = submode & (subvalid & islvar);
// if (submode & (subvalid & islvar)) fprintf(iofile, "j: %u\n", jval);
u32 tempB = (submode & (subvalid & !islvar));
u32 tempC = (submode & !subvalid) | (addmode & (addoverflow & !endOfCtr)) | (bctmode & (ctrWordIsZero & !endOfCtr));
u32 tempD = (cmpmode & (cmpnores & !begOfCtr));
u32 tempE = (addmode & (!addoverflow | endOfCtr));
u32 tempF = (bctmode & (!ctrWordIsZero | endOfCtr));
tempA *= jword;
tempB *= *index;
tempC *= (*index + 1);
tempD *= (*index - 1);
tempE *= (wcnt - 1);
u32 indexval = (*addval + bitcntval);
u8 indexcmp = indexval < varcnt;
indexval = indexval * indexcmp + (varcnt - 1) * (!indexcmp);
tempF *= c->index[((indexval) * bctmode)];
// if (bctmode & (!ctrWordIsZero | endOfCtr)) fprintf(iofile,"z: %u %u\n", (indexval), c->index[((indexval) * bctmode)]);
*index = tempA | tempB | tempC | tempD | tempE | tempF;
tempA = submode & (subvalid & islvar);
tempB = (submode & (subvalid & !islvar));
tempC = (addmode & (addoverflow & !endOfCtr));
tempD = (bctmode & (ctrWordIsZero & !endOfCtr));
tempA *= (jbit);
tempB *= (*addval + 1);
tempC *= (addoverflow);
tempD *= (*addval + 32);
*addval = tempA | tempB | tempC | tempD;
/*
* 1 << jbit : (subcheck & valid & lastvar)
* addval + 1 : (subcheck & valid & !lastvar)
* 0 : (subcheck & !valid) | (add & (!addoverflow | endOfCtr)) | (cmp) | (bitcnt & (!ctrWordIsZero | endOfCtr)
* addoverflow : (add & (addoverflow & !endOfCtr))
* addoverflow + 32 : (bitcnt & ctrWordIsZero & !endOfCtr)
*
*/
/*
* SAT : (subcheck & !valid & lastclause)
* UNSAT : (cmp & (!cmpnores | begOfCtr)) & !cmpisless
*/
u8 issat = (submode & ((!subvalid) & islclause));
u8 isusat = (cmpmode & ((!cmpnores) | begOfCtr)) & (!cmpisless);
u8 isdone = issat | isusat;
*mode += isdone << 2U;
*index ^= (*index * isdone);
*index += issat;
/*
if (isdone) {
*mode = 9;
*index = issat;
// printf("%u\n", issat);
}
*/
/*
* ADD : (add & (!addoverflow | endOfCtr))
* CMP : (cmp & (!cmpnores | begOfCtr))
* BCT : (bitcnt & (!ctrWordIsZero | endOfCtr))
* CHK : (subcheck & valid & lastvar)
*/
tempA = (addmode & (!addoverflow | endOfCtr));
tempB = (cmpmode & (!cmpnores | begOfCtr));
tempC = (bctmode & (!ctrWordIsZero | endOfCtr));
tempD = (submode & (subvalid & islvar));
*mode += tempA | tempB | tempC;
*mode -= (tempD * 3);
}
void ctrthings4(cnf* c, u32* state, u32* ctr, const u32* max, FILE* iofile) {
u32 varcnt = c->cnts[0];
u32 clausecnt = c->cnts[1];
u32 wcnt = 1U + (varcnt >> 5U);
u32* mode = state;
u32* index = state + 1;
u32* addval = state + 2;
// printf("> %u %u %u %u %u\n", *mode, *index, *addval, *ctr, *max);
u8 addmode = (*mode == ADD);
u8 cmpmode = (*mode == CMP);
u8 bctmode = (*mode == BCT);
u8 submode = (*mode == CHK);
// 1 mask for if in subcheck mode, 1 for if not
u32 submask = -submode;
u32 nsubmask = ~submask;
u32 addmask = -addmode;
u32 naddmask = ~addmask;
/* subcheck */
// used to find correct ctr index
u32 varcor = varcnt - 1;
// Mask to prevent oob accesses for when (varcnt / 32) > clausecnt (or smth like that)
u32 currclause = *index & submask;
// clausedat records are 3 words long, multiply index by 3
u32 currclauseshifted = currclause * 3U;
// Retrieve beginning index of clause, add current var index
u32 currvarind = c->clausedat[currclauseshifted] + (*addval & submask);
// Retrieve current var & parity
u32 currvar = c->variables[currvarind];
u8 corrpar = c->parities[currvarind];
// Calculate index in ctr
u32 currvarcorr = (varcor - currvar);
u32 currvarword = currvarcorr >> 5U;
u32 currvarbit = currvarcorr & 0b11111U;
/* Can we make there be 1 ctr read per iteration? */
// If in sub mode, retrieve current var's bit, else return ctr[*index]
u32 ctrind = (currvarword & submask) | (*index & nsubmask);
u32 ctrval = ctr[ctrind];
u32 maxval = max[*index & nsubmask];
// Extract parity bit from ctr
u32 currpar = (ctrval >> currvarbit) & 1U;
// Check if assignment parity matches clause parity
u8 subvalid = currpar == corrpar;
// Don't mask addval because it's not being used for a lookup
u8 islvar = c->clausedat[currclauseshifted + 1U] == (*addval + 1);
u32 jval = c->clausedat[currclauseshifted + 2U];
u32 jword = jval >> 5U;
u32 jbit = 1U << (jval & 0b11111U);
u8 islclause = (clausecnt - 1) <= *index;
u8 endOfCtr = (ctrind == (wcnt - 1));
u8 begOfCtr = (ctrind == 0);
/* add */
u32 nctr = ctrval + *addval;
u8 addoverflow = (nctr < ctrval);
/* cmp */
u8 cmpresult = -(ctrval < maxval);
cmpresult += (ctrval > maxval);
u8 cmpnores = cmpresult == 0;
u8 cmpisless = cmpresult == 255U;
/* bitcnt */
u32 bitcntval = (ctrval & -ctrval);
u32 bitcnt = ((bitcntval & 0x0000FFFFU) != 0) << 4U;
bitcnt |= ((bitcntval & 0x00FF00FF) != 0) << 3U;
bitcnt |= ((bitcntval & 0x0F0F0F0F) != 0) << 2U;
bitcnt |= ((bitcntval & 0x33333333) != 0) << 1U;
bitcnt |= (bitcntval & 0x55555555) != 0;
bitcnt += bitcntval != 0;
bitcntval = 32 - bitcnt;
u8 ctrWordIsZero = ctrval == 0;
// Set current ctr word
// If in add mode, update ctr val, otherwise leave unchanged
ctr[ctrind] = (nctr & addmask) | (ctr[ctrind] & naddmask);
// Set current index
/*
* jword : (subcheck & valid & lastvar)
* index : (subcheck & valid & !lastvar) | (add & (!addoverflow | endOfCtr))
* index + 1 : (subcheck & !valid) | (add & (addoverflow & !endOfCtr)) | (bitcnt & ctrWordIsZero & !endOfCtr)
* index - 1 : (cmp & cmpnores & !begOfCtr)
* 0 : (cmp & (!cmpnores | begOfCtr))
* index[addoverflow + bitcntval] : (bitcnt & (!ctrWordIsZero | endOfCtr)
*/
u32 tempA = -(submode & (subvalid & islvar));
if (submode & (subvalid & islvar)) fprintf(iofile, "j: %u\n", jval);
u32 tempB = -(submode & (subvalid & !islvar));
u32 tempC = -((submode & !subvalid) | (addmode & (addoverflow & !endOfCtr)) | (bctmode & (ctrWordIsZero & !endOfCtr)));
u32 tempD = -(cmpmode & (cmpnores & !begOfCtr));
u32 tempE = -(addmode & (!addoverflow | endOfCtr));
u32 tempF = -(bctmode & (!ctrWordIsZero | endOfCtr));
tempA &= jword;
tempB &= *index;
tempC &= (*index + 1);
tempD &= (*index - 1);
tempE &= (wcnt - 1);
u32 indexval = (*addval + bitcntval);
u32 indexcmp = -(indexval < varcnt);
indexval = (indexval & indexcmp) | ((varcnt - 1) & (~indexcmp));
tempF &= c->index[((indexval) * bctmode)];
if (bctmode & (!ctrWordIsZero | endOfCtr)) fprintf(iofile,"z: %u %u\n", (indexval), c->index[((indexval) * bctmode)]);
*index = tempA | tempB | tempC | tempD | tempE | tempF;
tempA = -(submode & (subvalid & islvar));
tempB = -(submode & (subvalid & !islvar));
tempC = -(addmode & (addoverflow & !endOfCtr));
tempD = -(bctmode & (ctrWordIsZero & !endOfCtr));
tempA &= (jbit);
tempB &= (*addval + 1);
tempC &= (addoverflow);
tempD &= (*addval + 32);
*addval = tempA | tempB | tempC | tempD;
/*
* 1 << jbit : (subcheck & valid & lastvar)
* addval + 1 : (subcheck & valid & !lastvar)
* 0 : (subcheck & !valid) | (add & (!addoverflow | endOfCtr)) | (cmp) | (bitcnt & (!ctrWordIsZero | endOfCtr)
* addoverflow : (add & (addoverflow & !endOfCtr))
* addoverflow + 32 : (bitcnt & ctrWordIsZero & !endOfCtr)
*
*/
/*
* SAT : (subcheck & !valid & lastclause)
* UNSAT : (cmp & (!cmpnores | begOfCtr)) & !cmpisless
*/
u8 issat = (submode & ((!subvalid) & islclause));
u8 isusat = (cmpmode & ((!cmpnores) | begOfCtr)) & (!cmpisless);
u32 isdone = -(issat | isusat);
*mode |= isdone;
*index ^= (*index & isdone);
*index += issat;
/*
if (isdone) {
*mode = 9;
*index = issat;
// printf("%u\n", issat);
}
*/
/*
* ADD : (add & (!addoverflow | endOfCtr))
* CMP : (cmp & (!cmpnores | begOfCtr))
* BCT : (bitcnt & (!ctrWordIsZero | endOfCtr))
* CHK : (subcheck & valid & lastvar)
*/
tempA = (addmode & (!addoverflow | endOfCtr));
tempB = (cmpmode & (!cmpnores | begOfCtr));
tempC = (bctmode & (!ctrWordIsZero | endOfCtr));
tempD = (submode & (subvalid & islvar));
*mode += tempA | tempB | tempC;
*mode -= (tempD * 3);
}
void ctrthings5(cnf* c, u32* state, u32* ctr, const u32* max) {
u32 varcnt = c->cnts[0];
u32 clausecnt = c->cnts[1];
u32 wcnt = 1U + (varcnt >> 5U);
u32* mode = state;
u32* index = state + 1;
u32* addval = state + 2;
// printf("> %u %u %u %u %u\n", *mode, *index, *addval, *ctr, *max);
u8 addmode = (*mode == ADD);
u8 cmpmode = (*mode == CMP);
u8 bctmode = (*mode == BCT);
u8 submode = (*mode == CHK);
// 1 mask for if in subcheck mode, 1 for if not
u32 submask = -submode;
u32 nsubmask = ~submask;
u32 addmask = -addmode;
u32 naddmask = ~addmask;
/* subcheck */
// used to find correct ctr index
u32 varcor = varcnt - 1;
// Mask to prevent oob accesses for when (varcnt / 32) > clausecnt (or smth like that)
u32 currclause = *index & submask;
// clausedat records are 3 words long, multiply index by 3
u32 currclauseshifted = currclause * 3U;
// Retrieve beginning index of clause, add current var index
u32 currvarind = c->clausedat[currclauseshifted] + (*addval & submask);
// Retrieve current var & parity
u32 currvar = c->variables[currvarind];
u8 corrpar = c->parities[currvarind];
// Calculate index in ctr
u32 currvarcorr = (varcor - currvar);
u32 currvarword = currvarcorr >> 5U;
u32 currvarbit = currvarcorr & 0b11111U;
/* Can we make there be 1 ctr read per iteration? */
// If in sub mode, retrieve current var's bit, else return ctr[*index]
u32 ctrind = (currvarword & submask) | (*index & nsubmask);
u32 ctrval = ctr[ctrind];
u32 maxval = max[*index & nsubmask];
// Extract parity bit from ctr
u32 currpar = (ctrval >> currvarbit) & 1U;
// Check if assignment parity matches clause parity
u8 subvalid = currpar == corrpar;
// Don't mask addval because it's not being used for a lookup
u8 islvar = c->clausedat[currclauseshifted + 1U] == (*addval + 1);
u32 jval = c->clausedat[currclauseshifted + 2U];
u32 jword = jval >> 5U;
u32 jbit = 1U << (jval & 0b11111U);
u8 islclause = (clausecnt - 1) <= *index;
u8 endOfCtr = (ctrind == (wcnt - 1));
u8 begOfCtr = (ctrind == 0);
/* add */
u32 nctr = ctrval + *addval;
u8 addoverflow = (nctr < ctrval);
/* cmp */
u8 cmpresult = -(ctrval < maxval);
cmpresult += (ctrval > maxval);
u8 cmpnores = cmpresult == 0;
u8 cmpisless = cmpresult == 255U;
/* bitcnt */
u32 bitcntval = (ctrval & -ctrval);
u32 bitcnt = ((bitcntval & 0x0000FFFFU) != 0) << 4U;
bitcnt |= ((bitcntval & 0x00FF00FF) != 0) << 3U;
bitcnt |= ((bitcntval & 0x0F0F0F0F) != 0) << 2U;
bitcnt |= ((bitcntval & 0x33333333) != 0) << 1U;
bitcnt |= (bitcntval & 0x55555555) != 0;
bitcnt += bitcntval != 0;
bitcntval = 32 - bitcnt;
u8 ctrWordIsZero = ctrval == 0;
// Set current ctr word
// If in add mode, update ctr val, otherwise leave unchanged
ctr[ctrind] = (nctr & addmask) | (ctr[ctrind] & naddmask);
// Set current index
u32 tempA = -(submode & (subvalid & islvar));
u32 tempB = -(submode & (subvalid & !islvar));
u32 tempC = -((submode & !subvalid) | (addmode & (addoverflow & !endOfCtr)) | (bctmode & (ctrWordIsZero & !endOfCtr)));
u32 tempD = -(cmpmode & (cmpnores & !begOfCtr));
u32 tempE = -(addmode & (!addoverflow | endOfCtr));
u32 tempF = -(bctmode & (!ctrWordIsZero | endOfCtr));
tempA &= jword;
tempB &= *index;
tempC &= (*index + 1);
tempD &= (*index - 1);
tempE &= (wcnt - 1);
u32 indexval = (*addval + bitcntval);
u32 indexcmp = -(indexval < varcnt);
indexval = (indexval & indexcmp) | ((varcnt - 1) & (~indexcmp));
tempF &= c->index[((indexval) * bctmode)];
*index = tempA | tempB | tempC | tempD | tempE | tempF;
tempA = -(submode & (subvalid & islvar));
tempB = -(submode & (subvalid & !islvar));
tempC = -(addmode & (addoverflow & !endOfCtr));
tempD = -(bctmode & (ctrWordIsZero & !endOfCtr));
tempA &= (jbit);
tempB &= (*addval + 1);
tempC &= (addoverflow);
tempD &= (*addval + 32);
*addval = tempA | tempB | tempC | tempD;
/* completion detection */
u8 issat = (submode & ((!subvalid) & islclause));
u8 isusat = (cmpmode & ((!cmpnores) | begOfCtr)) & (!cmpisless);
u32 isdone = -(issat | isusat);
/* exit */
*mode |= isdone;
*index ^= (*index & isdone);
*index += issat;
tempA = (addmode & (!addoverflow | endOfCtr));
tempB = (cmpmode & (!cmpnores | begOfCtr));
tempC = (bctmode & (!ctrWordIsZero | endOfCtr));
tempD = (submode & (subvalid & islvar));
*mode += tempA | tempB | tempC;
*mode -= (tempD * 3);
}
/** NOTES:
* First thing's first is indexing, let's get started with that.
*
*
*
*
*
*/
void ctrthings2(cnf* c, u32* state, u32* ctr, const u32* max, FILE* iofile) {
// Pre-actual work setup stuff
u32 wcnt = 1U + (c->cnts[0] >> 5U); // number of words we need (bitcnt + 1 ceiling divided by 32)
u32* mode = state; // Make pointers to array values for convenience so I don't need to refer to everything via array indices
u32* index = state + 1; // tbh the names aren't much better
u32* addval = state + 2;
u32 varcnt = c->cnts[0] - 1; // (varcnt - 1) - v gives us the bit index of variable v in the ctr
u32 chkmsk = 0xFFFFFFFFU * (*mode == (2)); // Create a mask that zeros anything if we aren't in subsumption-check mode, to prevent out of bounds accesses
u32 chkcls = *index & chkmsk; // Use said mask to zero the index if we're not in check mode
u32 chkind = c->clausedat[3 * chkcls] + (*addval & chkmsk); // potentially zeroed index to lookup clause start, add potentially zeroed variable index in that clause
u32 var = c->variables[chkind]; // Retrieve variable at that index
u8 par = c->parities[chkind]; // Retrieve the variable's CNF parity
u32 vword = (varcnt - var) >> 5U; // As mentioned in the varcnt line, compute word index of var in ctr
u32 vbit = (varcnt - var) & 0b11111U; // Compute bit index of var in above ctr word
u8 corpar = (ctr[vword] >> vbit) & 1U; // Extract correct parity
u8 isvalid = (par == corpar); // Check if CNF parity equals assignment parity from ctr
u8 islvar = ((*addval + 1) == c->clausedat[3 * chkcls + 1]); // Check if this is the last variable of the clause
u8 isbchk0 = (*mode == (2)); // If in check mode
u8 isbchk1 = isbchk0 & isvalid; // If in check mode & valid
u8 isbchk2 = isbchk1 & islvar; // If in check mode, valid, and on the last var
u32 j = c->clausedat[3 * chkcls + 2]; // retrieve j val for current clause
*mode -= 2 * isbchk2; // If isbchk2, move to add mode
#ifdef DEBUGSAT
if (isbchk2) fprintf(iofile,"j: %u\n", j);
#endif
*index = (j >> 5U) * isbchk2 + *index * (!isbchk2); // if isbchk2, set index for the jval to be added
*addval = (1U << (j & 0b11111U)) * isbchk2 + *addval * (!isbchk2); // if isbchk, set addval for the jval to be added;
*addval += ((isbchk1) & (!islvar)); // If in check mode, valid, and not on the last var, move to the next var
u8 isbchk3 = (isbchk0 & (!isvalid)); // if in check mode and not valid, move to the next clause
*addval *= (!isbchk3); // if moving to next clause, go to 0th var of that clause
*index += (isbchk3); // actually moving to the next clause
u8 issat = (*index == c->cnts[1]) * (isbchk3); // if we just passed the last clause, none were valid, so our assignment is SAT
u32 cmpaddind = *index * (*mode != (2)); // Zero index into ctr if in check mode, to prevent out of bounds accesses
u32 nval = ctr[cmpaddind] + *addval; // Find the result of the current step if it was addition
*addval = (nval < ctr[cmpaddind]) * (*mode == (0)) + (*addval) * (*mode == (2)); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone.
ctr[cmpaddind] = nval * ((*mode == (0)) & !issat) + ctr[cmpaddind] * ((*mode != (0)) | issat); // If in add mode, set new ctr val, otherwise leave unchanged
*addval -= (ctr[cmpaddind] < max[cmpaddind]) * (*mode == (1)); // If in comparison mode, decrement addval if less than
*addval += (ctr[cmpaddind] > max[cmpaddind]) * (*mode == (1)); // If in comparison mode, increment addval if greater than
u8 addcond = (*addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word
u8 cmpcond = (*addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word
u8 exittime = (*mode == (1)) & cmpcond & (*addval != -1);
exittime |= issat;
if (exittime) { // If in cmpmode and the comparison result is not less than, unsat
printf("%u\n", issat);
*mode = 3;
return;
}
u8 cmpdone = cmpcond & (*mode == (1)); // if comparison completion conditions are satisfied and in CMP mode
u32 addindex = (cmpaddind + 1) * !addcond + (cmpaddind) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1
*index = addindex * (*mode == (0)) + (*index - (*mode == (1))) * (*mode != (0)); // If in add mode, use addindex; if in cmp mode, decrement index by 1
*index *= !cmpdone;
*addval *= !(((addcond) & (*mode == (0))) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged.
*mode += addcond * (*mode == (0)) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode.
}
i32 cpusolve(cnf* c) {
u32 state[3];
u32 wcnt = 1 + (c->cnts[0] >> 5U);
u32* ctr = calloc((wcnt), sizeof(u32));
if (ctr == NULL) {
printf("Failed to allocate solution buffer\n");
exit(1);
}
u32* max = calloc((wcnt), sizeof(u32));
if (max == NULL) {
printf("Failed to allocate solution buffer\n");
exit(1);
}
stateaddpow(wcnt, max, c->cnts[0]);
state[0] = 2;
state[1] = state[2] = 0;
while (state[0] < 4) {
ctrthings5(c, state, ctr, max);
}
free(ctr);
free(max);
return state[1];
}
i32 compareFiles(FILE *file1, FILE *file2){
char ch1 = getc(file1);
char ch2 = getc(file2);
int error = 0, pos = 0, line = 1;
while (ch1 != EOF && ch2 != EOF){
pos++;
if (ch1 == '\n' && ch2 == '\n'){
line++;
pos = 0;
}
if (ch1 != ch2){
error++;
printf("Line Number : %d \tError"
" Position : %d \n", line, pos);
}
ch1 = getc(file1);
ch2 = getc(file2);
}
if (ch1 != EOF || ch2 != EOF) error += 1;
printf("Total Errors : %d\t\n", error);
return error;
}
void debugsolve(char* path) {
FILE* reffile = fopen("temp.txt", "w");
FILE* cmpfile = fopen("temp2.txt", "w");
runcsfloc(path, reffile);
cnf* c = readDIMACS(path);
sortlastnum(c);
u32* state = malloc(sizeof(u32) * 3);
u32 wcnt = 1 + (c->cnts[0] >> 5U);
u32* ctr = calloc((wcnt), sizeof(u32));
if (ctr == NULL) {
printf("Failed to allocate solution buffer\n");
exit(1);
}
u32* max = calloc((wcnt), sizeof(u32));
if (max == NULL) {
printf("Failed to allocate solution buffer\n");
exit(1);
}
stateaddpow(wcnt, max, c->cnts[0]);
state[0] = 2;
state[1] = state[2] = 0;
while (state[0] < 4) {
ctrthings4(c, state, ctr, max, cmpfile);
}
free(ctr);
free(max);
freecnf(c);
fclose(reffile);
fclose(cmpfile);
reffile = fopen("temp.txt", "r");
cmpfile = fopen("temp2.txt", "r");
i32 res = compareFiles(reffile, cmpfile);
if (res != 0) {
printf("%s\n", path);
exit(1);
}
fclose(reffile);
fclose(cmpfile);
free(state);
}

View File

@@ -1,31 +1,9 @@
#pragma once
#include "types.h"
typedef struct {
u32 varcnt;
u32 clausecnt;
u32* clauseinds;
u32* clausevals;
u32* watchlist;
} cpusolver;
void cpusolve() {
cpusolver s;
#include "ncnf.h"
}
/*
* Read in DIMACs
* A clause is a list of variables and their assignments
*
* Create watchlists:
* 2 entries for each list - flag bit for if its a binary clause
*
* 2-watch:
* If one of them is marked false, remove and replace
* if there are no other literals in the clause that are true, unitprop the last literal
* if there are no literals that evaluate to true and the other watch is false, UNSAT
* if a literal is true, do nothing
* if 2 literals are unassigned, clause is unsat
*/
i32 cpusolve(cnf* c);
void debugsolve(char* path);

630
csflocref.c Normal file
View File

@@ -0,0 +1,630 @@
#include "csflocref.h"
/*
============================================================================
Name : csfloc.c
Author : Gábor Kusper
Version : 1.0
Copyright : Gábor Kusper
Description : CSFLOC in C, Ansi-style
============================================================================
*/
#include <stdlib.h>
#include <string.h>
#include <time.h>
char* fileName;
int doSorting = 1;
int doIndexing = 1; // can be true only if doSorting is also true
int addLongTailClauses = 0; // can be greater than 0 only if doIndexing is true
int howManyClausesAddMaximumPerClauses = 0;
unsigned int MAXINTINDEX;
unsigned int MAXBITINDEX;
unsigned int MAXVALUE;
unsigned int* counter;
unsigned int* clauses;
int* numxsAtEnd;
int* indexClausesByNumxsAtEnd;
int numOfClauses;
int numOfVariables;
int lentghOfBitArray;
void printBitArray(unsigned int* toPrint)
{
int i;
for(i = numOfVariables-1; i>=0; i--)
{
int intIndex = i / (sizeof(int)*8);
int bitIndex = i % (sizeof(int)*8);
int mask = 1 << bitIndex;
if((toPrint[intIndex] & mask) > 0) printf("%d ", numOfVariables-i);
else printf("-%d ", numOfVariables-i);
}
printf("0 \n");
}
void printClause(int clauseIndex)
{
printf("clauseIndex:%d\n",clauseIndex);
printf("numxsAtEnd[clauseIndex]:%d\n",numxsAtEnd[clauseIndex]);
int index = clauseIndex * lentghOfBitArray * 2;
int i;
for(i = numOfVariables-1; i>=0; i--)
{
int intIndex = i / (sizeof(int)*8);
int bitIndex = i % (sizeof(int)*8);
int mask = 1 << bitIndex;
if((clauses[index + intIndex*2] & mask) > 0) printf("%d ", numOfVariables-i);
else printf("-%d ", numOfVariables-i);
}
printf("0 \n");
for(i = numOfVariables-1; i>=0; i--)
{
int intIndex = i / (sizeof(int)*8);
int bitIndex = i % (sizeof(int)*8);
int mask = 1 << bitIndex;
if((clauses[index + intIndex*2 + 1] & mask) > 0) printf("%d ", numOfVariables-i);
else printf("-%d ", numOfVariables-i);
}
printf("0 \n");
}
void printClauses()
{
int clauseIndex;
for(clauseIndex=0; clauseIndex < numOfClauses; clauseIndex++)
{
printClause(clauseIndex);
}
}
// coin: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightBinSearch
int countZeroBitsOnTheRight(unsigned int v)
{
int c;
// NOTE: if 0 == v, then c = 31.
if (v & 0x1)
{
// special case for odd v (assumed to happen half of the time)
c = 0;
}
else
{
c = 1;
if ((v & 0xffff) == 0)
{
v >>= 16;
c += 16;
}
if ((v & 0xff) == 0)
{
v >>= 8;
c += 8;
}
if ((v & 0xf) == 0)
{
v >>= 4;
c += 4;
}
if ((v & 0x3) == 0)
{
v >>= 2;
c += 2;
}
c -= v & 0x1;
}
return c;
}
int countZeroBitsOnTheRightInBitArray(int clauseIndex)
{
int countZeroBits = 0;
int index = clauseIndex * lentghOfBitArray * 2;
int intIndex;
for(intIndex=0; intIndex<lentghOfBitArray; intIndex++)
{
if(clauses[index + intIndex*2]!=0) break;
countZeroBits += sizeof(unsigned int)*8;
}
if (intIndex == lentghOfBitArray) return numOfVariables-1;
countZeroBits += countZeroBitsOnTheRight(clauses[index + intIndex*2]);
return countZeroBits;
}
int countZeroBitsOnTheRightInBitArrayOldOne(unsigned int* bits)
{
int countZeroBits = 0;
int intIndex ;
for(intIndex=0; intIndex<lentghOfBitArray; intIndex++)
{
if(bits[intIndex]!=0) break;
countZeroBits += sizeof(unsigned int)*8;
}
if (intIndex == lentghOfBitArray) return numOfVariables-1;
countZeroBits += countZeroBitsOnTheRight(bits[intIndex]);
return countZeroBits;
}
void resolution(int clauseIndexC, int clauseIndexA, int clauseIndexB, int intIndex, int bitIndex)
{
//printf("Start of Resolution\n");
//printf("intIndex: %d\n", intIndex);
//printf("bitIndex: %d\n", bitIndex);
//printClause(clauseIndexA);
//printClause(clauseIndexB);
numxsAtEnd[clauseIndexC] = -1;
int indexA = clauseIndexA * lentghOfBitArray * 2;
int indexB = clauseIndexB * lentghOfBitArray * 2;
if ((clauses[indexA + intIndex*2 +1] & 1 << bitIndex) == (clauses[indexB + intIndex*2 +1] & 1 << bitIndex)) return;
int i;
for(i=0; i<lentghOfBitArray; i++)
{
unsigned int tesztA, tesztB, tesztMask;
tesztMask = clauses[indexA + i*2] & clauses[indexB + i*2];
tesztA = clauses[indexA + i*2 + 1] & tesztMask;
tesztB = clauses[indexB + i*2 + 1] & tesztMask;
if (i==intIndex) { tesztA |= 1<<bitIndex; tesztB |= 1<<bitIndex; }
if (tesztA != tesztB) return;
}
int indexC = clauseIndexC * lentghOfBitArray * 2;
for(i=0; i<lentghOfBitArray; i++)
{
clauses[indexC + i*2] = clauses[indexA + i*2] | clauses[indexB + i*2];
clauses[indexC + i*2 + 1] = clauses[indexA + i*2 + 1] | clauses[indexB + i*2 + 1];
}
clauses[indexC + intIndex*2] &= ~(1<<bitIndex);
clauses[indexC + intIndex*2 + 1] &= ~(1<<bitIndex);
numxsAtEnd[clauseIndexC] = countZeroBitsOnTheRightInBitArray(clauseIndexC);
//printf("End of Resolution\n");
//printClause(clauseIndexA);
//printClause(clauseIndexB);
//printClause(clauseIndexC);
}
void readDIMACScsfloc(char* fileName) {
char c[1000];
FILE *fptr;
if ((fptr=fopen(fileName,"r"))==NULL)
{
printf("Error! opening file");
exit(1);
}
int clauseIndex = 0;
while (fgets(c,1000, fptr)!=NULL) {
if (c[0]=='c' || c[0]=='%' || c[0]=='0' || c[0]=='\n') continue;
if (c[0]=='p'){
char* token = strtok(c, " ");
token = strtok(NULL, " ");
token = strtok(NULL, " ");
numOfVariables = atoi(token);
MAXINTINDEX = numOfVariables/(sizeof(int)*8);
MAXBITINDEX = numOfVariables%(sizeof(int)*8);
MAXVALUE = 1 << MAXBITINDEX;
lentghOfBitArray = numOfVariables/(sizeof(int)*8)+1;
token = strtok(NULL, " ");
numOfClauses = atoi(token);
numxsAtEnd = malloc(sizeof(int) * (numOfClauses+addLongTailClauses));
clauses = malloc(sizeof(int) * (numOfClauses+addLongTailClauses) * lentghOfBitArray * 2);
indexClausesByNumxsAtEnd = malloc(sizeof(int) * numOfVariables);
int i;
for(i=0; i<numOfVariables; i++)
{
indexClausesByNumxsAtEnd[i] = 0;
}
continue;
}
int index = clauseIndex * lentghOfBitArray * 2;
int i;
for(i=0; i<lentghOfBitArray; i++)
{
clauses[index + i*2] = 0;
clauses[index + i*2 +1] = 0;
}
int maxLiteral = 0;
char* token = strtok(c, " ");
while (token) {
int literal = atoi(token);
token = strtok(NULL, " ");
if (literal == 0) continue;
int literalIndex = numOfVariables - abs(literal);
int intIndex = literalIndex / (sizeof(int)*8);
int bitIndex = literalIndex % (sizeof(int)*8);
unsigned int setMask = 1 << bitIndex;
clauses[index + intIndex * 2] |= setMask;
if (literal > 0) clauses[index + intIndex * 2 +1] |= setMask;
if (abs(literal) > maxLiteral) maxLiteral = abs(literal);
}
numxsAtEnd[clauseIndex] = numOfVariables - maxLiteral;
clauseIndex++;
if (clauseIndex == numOfClauses) break;
}
fclose(fptr);
/*
for (uint i = 0; i < numOfClauses; ++i) {
printf("%u ", numxsAtEnd[i]);
}
printf("\n");
printf("numxsAtEnd\n");
*/
}
void sortClausesByNumxsAtEnd()
{
//printf("Before sorting:\n");
//printClauses();
int i;
for(i=1; i<numOfClauses; i++)
{
int clauseIndex;
for(clauseIndex=0; clauseIndex<numOfClauses-i; clauseIndex++)
{
if (numxsAtEnd[clauseIndex]<numxsAtEnd[clauseIndex+1])
{
unsigned int temp = numxsAtEnd[clauseIndex];
numxsAtEnd[clauseIndex] = numxsAtEnd[clauseIndex+1];
numxsAtEnd[clauseIndex+1] = temp;
int indexA = clauseIndex * lentghOfBitArray * 2;
int indexB = (clauseIndex+1) * lentghOfBitArray * 2;
int relIndex;
for(relIndex=0; relIndex<lentghOfBitArray*2; relIndex++)
{
temp = clauses[indexA+relIndex];
clauses[indexA+relIndex] = clauses[indexB+relIndex];
clauses[indexB+relIndex] = temp;
}
}
}
}
//printf("After sorting:\n");
//printClauses();
}
void createIndexClausesByNumxsAtEnd()
{
int i;
int lastValue = 0;
for(i=numOfClauses-1; i>=0; i--)
{
if (numxsAtEnd[i] > lastValue)
{
while(lastValue != numxsAtEnd[i])
{
indexClausesByNumxsAtEnd[lastValue] = i+1;
lastValue++;
}
}
}
// it might be, that we over index if there is no clause with numxAtEnd == 0
// this part corrects this if needed
int correctioningIndex = 0;
while (indexClausesByNumxsAtEnd[correctioningIndex] == numOfClauses)
{
correctioningIndex++;
}
if (correctioningIndex!=0)
{
int goodValue = indexClausesByNumxsAtEnd[correctioningIndex];
do
{
correctioningIndex--;
indexClausesByNumxsAtEnd[correctioningIndex] = goodValue;
}
while(correctioningIndex!=0);
}
}
void addClausesByResolution()
{
int numberOfNewClauses = 0;
int numOfZerosAtEnd = 0;
while (numberOfNewClauses<addLongTailClauses) // While adding clauses
{
int clauseIndexA= numOfClauses-1;
if(numOfZerosAtEnd!=0)
{
clauseIndexA = indexClausesByNumxsAtEnd[numOfZerosAtEnd-1]-1;
}
for( ; clauseIndexA>=indexClausesByNumxsAtEnd[numOfZerosAtEnd]; clauseIndexA--)
{
int clauseIndexB;
int newClausesPerClause = 0;
for(clauseIndexB=clauseIndexA-1; clauseIndexB>=indexClausesByNumxsAtEnd[numOfZerosAtEnd]; clauseIndexB--)
{
resolution(numOfClauses+numberOfNewClauses, clauseIndexA, clauseIndexB,
numOfZerosAtEnd/(sizeof(unsigned int)*8),
numOfZerosAtEnd%(sizeof(unsigned int)*8));
if (numxsAtEnd[numOfClauses+numberOfNewClauses] == -1) continue;
//printf("numOfZerosAtEnd: %d\n", numOfZerosAtEnd);
//printf("numberOfNewClauses: %d\n", numberOfNewClauses);
//printf("numxsAtEnd[numOfClauses+numberOfNewClauses]: %d\n", numxsAtEnd[numOfClauses+numberOfNewClauses]);
numberOfNewClauses++;
newClausesPerClause++;
if (newClausesPerClause==howManyClausesAddMaximumPerClauses) break;
if (numberOfNewClauses==addLongTailClauses) break;
}
if (numberOfNewClauses==addLongTailClauses) break;
// +1 m?lys?g eleje
int numOfZerosAtEnd2 = numxsAtEnd[numOfClauses+numberOfNewClauses-1];
int clauseIndexC = numOfClauses+numberOfNewClauses-1;
int clauseIndexD;
for(clauseIndexD=indexClausesByNumxsAtEnd[numOfZerosAtEnd2-1]-1;
clauseIndexD>=indexClausesByNumxsAtEnd[numOfZerosAtEnd2]; clauseIndexD--)
{
//printf("before resolution in +1 depth\n");
//printf("numxsAtEnd[clauseIndexC]: %d\n", numxsAtEnd[clauseIndexC]);
//printf("numxsAtEnd[clauseIndexD]: %d\n", numxsAtEnd[clauseIndexD]);
resolution(numOfClauses+numberOfNewClauses, clauseIndexC, clauseIndexD,
numOfZerosAtEnd2/(sizeof(unsigned int)*8),
numOfZerosAtEnd2%(sizeof(unsigned int)*8));
if (numxsAtEnd[numOfClauses+numberOfNewClauses] == -1) continue;
//printf("after resolution in +1 depth\n");
//printf("numOfZerosAtEnd2: %d\n", numOfZerosAtEnd2);
//printf("numberOfNewClauses: %d\n", numberOfNewClauses);
//printf("numxsAtEnd[numOfClauses+numberOfNewClauses]: %d\n", numxsAtEnd[numOfClauses+numberOfNewClauses]);
numberOfNewClauses++;
newClausesPerClause++;
if (newClausesPerClause==howManyClausesAddMaximumPerClauses) break;
if (numberOfNewClauses==addLongTailClauses) break;
}
if (newClausesPerClause==howManyClausesAddMaximumPerClauses) break;
if (numberOfNewClauses==addLongTailClauses) break;
if (numberOfNewClauses==addLongTailClauses) break;
//+1 m?lys?g v?ge
}
numOfZerosAtEnd++;
if (numOfZerosAtEnd == numOfVariables) break;
}
int i;
for(i=numOfClauses; i<numOfClauses+numberOfNewClauses; i++)
{
int clauseIndex;
for(clauseIndex=i-1; clauseIndex>=0; clauseIndex--)
{
if (numxsAtEnd[clauseIndex]<numxsAtEnd[clauseIndex+1])
{
unsigned int temp = numxsAtEnd[clauseIndex];
numxsAtEnd[clauseIndex] = numxsAtEnd[clauseIndex+1];
numxsAtEnd[clauseIndex+1] = temp;
int indexA = clauseIndex * lentghOfBitArray * 2;
int indexB = (clauseIndex+1) * lentghOfBitArray * 2;
int relIndex;
for(relIndex=0; relIndex<lentghOfBitArray*2; relIndex++)
{
temp = clauses[indexA+relIndex];
clauses[indexA+relIndex] = clauses[indexB+relIndex];
clauses[indexB+relIndex] = temp;
}
}
}
}
numOfClauses+=numberOfNewClauses;
createIndexClausesByNumxsAtEnd();
}
void CCCSolver(FILE* iofile)
{
counter = malloc(sizeof(int)*lentghOfBitArray);
int counterArrayIndex;
for(counterArrayIndex=0; counterArrayIndex<lentghOfBitArray; counterArrayIndex++)
{
counter[counterArrayIndex] = 0;
}
int changed = 1;
while(changed)
{
changed = 0;
int maxNumx = -1;
int clauseIndex = 0;
if (doIndexing)
{
int numOfZerosAtEnd = countZeroBitsOnTheRightInBitArrayOldOne(counter);
clauseIndex=indexClausesByNumxsAtEnd[numOfZerosAtEnd];
fprintf(iofile,"z: %d %d\n", numOfZerosAtEnd, clauseIndex);
}
while(clauseIndex<numOfClauses)
{
int subsumed = 1;
int intIndex = 0;
int index = clauseIndex * lentghOfBitArray * 2;
for(intIndex=0; intIndex<lentghOfBitArray; intIndex++)
{
if ((counter[intIndex] & clauses[index]) != (clauses[index+1] & clauses[index]))
{
subsumed = 0;
break;
}
index+=2;
}
//printf("Subsumed: %d\n", subsumed);
if (subsumed)
{
if (numxsAtEnd[clauseIndex]>maxNumx) maxNumx = numxsAtEnd[clauseIndex];
// printf("s %u %u\n", clauseIndex, numxsAtEnd[clauseIndex]);
if (doSorting) {
break;
}
}
clauseIndex++;
}
if(maxNumx >= 0)
{
fprintf(iofile, "j: %d\n",maxNumx);
int intIndex = maxNumx / (sizeof(int)*8);
int bitIndex = maxNumx % (sizeof(int)*8);
unsigned int oldCounter = counter[intIndex];
//printf("Counter old = %d : %d\n", intIndex, oldCounter);
counter[intIndex] += 1 << bitIndex;
//printf("Counter new = %d : %u\n\n", intIndex, counter.array[intIndex]);
// add carry
while(oldCounter > counter[intIndex]) {
//printf("add carry\n");
//printf("num of clauses:%d\n",numOfClauses);
//printf("i = %d\n", i);
//printf("Counter old = %d : %u\n", intIndex, oldCounter);
//printf("bitIndex = %d ; 1 << bitIndex = %d\n", bitIndex, 1 << bitIndex);
//printf("Counter new = %d : %u\n", intIndex, counter.array[intIndex]);
intIndex++;
oldCounter = counter[intIndex];
//printf("Counter old = %d : %u\n", intIndex, oldCounter);
counter[intIndex]++; // carry added here
//printf("Counter new = %d : %u\n\n", intIndex, counter[intIndex]);
}
if (counter[lentghOfBitArray-1] >= MAXVALUE) return;
changed = 1;
}
}
int i;
for (i=0; i<lentghOfBitArray; i++) {
counter[i] = ~counter[i];
}
counter[lentghOfBitArray-1] &= (MAXVALUE-1);
}
void printHelp() {
printf("Count Clear Clause SAT Solver v2.0\n");
printf("\tA SAT solver based on the CCC algorithm.\n");
printf("\tDeveloped by Gabor Kusper, 2015.\n");
printf("USAGE:\n");
printf("\tcsfloc [-help] -input=<inputfile.cnf>\n");
printf("\t\t<inputfile.cnf> is a file name, it is the SAT problem to be solved, "
"it should be in DIMACS format.\n");
printf("\t\tOUTPUT is written to standard output.\n");
printf("OPTIONS:\n");
printf("\t-help\tDisplays this help message.\n");
printf("\t-dosort\tTurns on clause sorting.\n");
printf("\t-donotsort\tTurns off clause sorting.\n");
printf("\t-doindexing\tTurns on clause indexing, and also forces clause sorting to be switched on.\n");
printf("\t-donotindexing\tTurns off clause indexing.\n");
printf("\t-addLongTailClauses=N\tAllows to add long tail clauses, ");
printf("max N clauses are allowed, if N is 0 then it switches off long tail clause adding. ");
printf("If N is greater than 0 then it forces clause ordering and indexing.\n");
printf("\t-addLongTailClausesPerClause=M\tAllows to add maximum M long tail clauses per each input clause, ");
printf("if M is 0 then it switches off long tail clause adding, i.e., it sets N to be 0. ");
printf("It has any effect if and only if long tail clause adding is switched on, ");
printf("i.e., N and M are not 0.\n");
printf("THE DEFAULT SETTINGS ARE:\n");
printf("Clause indexing is turned on, clause sorting is turned on, ");
printf("1000 long tail clauses are allowed, 100 per input clauses, i.e., N = 1000, and M = 100.\n");
printf("EXAMPLE 1:\n");
printf("\tcsfloc -input=hole6.cnf\n");
printf("\tReads and solves the SAT problem represented by hole6.cnf. The solution is written to the standard output.\n");
printf("EXAMPLE 2:\n");
printf("\tcsfloc -input=uf50-01.cnf -dosort -doindex -addLongTailClauses=1000 -addLongTailClausesPerClause=100\n");
printf("\tReads and solves the SAT problem represented by uf50-01.cnf. ");
printf("Clause sorting is switched on. ");
printf("Clause indexing is switched on. ");
printf("Adding long tail clauses is switch on. Maximum 1000 new clauses are created, maximum 100 per input clauses. ");
printf("The solution is written to the standard output.\n");
}
// read the "value" of a command line argument, where the format of the arguments is <name>=<value>
char* getCommandLineArgValue(char* arg) {
char* s = strchr(arg, '=');
if (!s) return NULL;
// close the "name" of the argument, as a string
*s = '\0';
// return the "value" of the argument
return s + 1;
}
// process all the command line argument
int processCommandLineArgs(int argc, char** argv) {
int i=0;
int numberOfSetMandatoryOptions=1;
return numberOfSetMandatoryOptions == 1;
}
void runcsfloc(char* path, FILE* iofile) {
long startTime = clock();
//printf("phase 0\n");
readDIMACScsfloc(path);
//printf("phase 1\n");
// printClauses();
if (doSorting) {
sortClausesByNumxsAtEnd();
}
//printf("phase 2\n");
// printClauses();
if (doIndexing) createIndexClausesByNumxsAtEnd();
//printf("phase 3\n");
if (addLongTailClauses>0) {
addClausesByResolution();
printf("Please don't do this\n");
}
CCCSolver(iofile);
long endTime = clock();
/*
if (counter[lentghOfBitArray-1] >= MAXVALUE) printf("0\n");
else
{
printf("1\n");
}
*/
free(numxsAtEnd);
free(counter);
free(clauses);
free(indexClausesByNumxsAtEnd);
}

5
csflocref.h Normal file
View File

@@ -0,0 +1,5 @@
#pragma once
#include "types.h"
#include <stdio.h>
void runcsfloc(char* path, FILE* iofile);

1
githubtoken Normal file
View File

@@ -0,0 +1 @@
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

View File

@@ -1,16 +1,349 @@
#include "gpusolver.h"
#include <CL/cl.h>
#include "time.h"
#include "gmp.h"
#define LOCAL_SIZE (128)
#define GLOBAL_SIZE (1024)
#define LOCAL_SIZE (64)
#define GLOBAL_SIZE (2048)
#define CHECKASGN (true)
#define DEBUG
gpusolver* initSolver() {
gpusolver* o = calloc(1, sizeof(gpusolver));
if (o == NULL) return NULL;
o->platformid = NULL;
o->numplatforms = 0;
o->deviceid = NULL;
o->numdevices = 0;
FILE* fp = fopen("../psat.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel\n");
// TODO: Cleanup
return NULL;
}
o->source_str = malloc(0x100000);
o->source_size = fread(o->source_str, 1, 0x100000, fp);
o->source_str = realloc(o->source_str, o->source_size + 1);
if (o->source_str == NULL) {
printf("Failed to reallocate source\n");
return NULL;
}
fclose(fp);
cl_int res = clGetPlatformIDs(1, &(o->platformid), &(o->numplatforms));
if (res != CL_SUCCESS) {
printf("Failed to retrieve OpenCL platform IDs\n");
// TODO: Cleanup
return NULL;
}
res = clGetDeviceIDs(o->platformid, CL_DEVICE_TYPE_GPU, 1, &(o->deviceid), &(o->numdevices));
if (res != CL_SUCCESS) {
printf("Failed to retrieve OpenCL device IDs\n");
// TODO: Cleanup
return NULL;
}
o->ctx = clCreateContext(NULL, 1, &(o->deviceid), NULL, NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create OpenCL context\n");
// TODO: Cleanup
return NULL;
}
cl_queue_properties properties[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0 };
o->commqueue = clCreateCommandQueueWithProperties(o->ctx, o->deviceid, properties, &res);
if (res != CL_SUCCESS) {
printf("Failed to create OpenCL command queue\n");
// TODO: Cleanup
return NULL;
}
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &(o->gpuMemoryMax), NULL);
if (res != CL_SUCCESS) {
printf("Failed to query total GPU memory\n");
// TODO: CLeanup
return NULL;
}
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &(o->gpuLocalMax), NULL);
if (res != CL_SUCCESS) {
printf("Failed to query total GPU memory\n");
// TODO: CLeanup
return NULL;
}
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &(o->gpuAllocMax), NULL);
if (res != CL_SUCCESS) {
printf("Failed to query total GPU memory\n");
// TODO: CLeanup
return NULL;
}
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_ulong), &(o->gpuCUs), NULL);
if (res != CL_SUCCESS) {
printf("Failed to query total GPU memory\n");
// TODO: Cleanup
return NULL;
}
o->program = clCreateProgramWithSource(o->ctx, 1, (const char**) &(o->source_str), (const size_t*) &(o->source_size), &res);
if (res != CL_SUCCESS) {
printf("Failed to create OpenCL program\n");
// TODO: Cleanup
exit(1);
}
res = clBuildProgram(o->program, 1, &(o->deviceid), NULL, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Build failed\n");
// TODO: Cleanup
exit(1);
}
size_t loglen = 0;
res = clGetProgramBuildInfo(o->program, o->deviceid, CL_PROGRAM_BUILD_LOG, NULL, NULL, &loglen);
if (res != CL_SUCCESS) {
printf("Failed to retrieve build logs\n");
exit(1);
}
char* logbuf = malloc(sizeof(char) * loglen);
res = clGetProgramBuildInfo(o->program, o->deviceid, CL_PROGRAM_BUILD_LOG, sizeof(char) * loglen, logbuf, &loglen);
if (res != CL_SUCCESS) {
printf("Failed to retrieve build logs\n");
exit(1);
}
printf("%*.s\n", (int) loglen, logbuf);
free(logbuf);
o->kernel = clCreateKernel(o->program, "vectorSAT", &res);
if (res != CL_SUCCESS) {
printf("Failed to create kernel\n");
printf("%d\n", res);
// TODO: Cleanup
exit(1);
}
printf("Initialized solver:\n");
printf("\tCompute Units: %lu\n", o->gpuCUs);
printf("\tMax Global Memory: %lu\n", o->gpuMemoryMax);
printf("\tMax Local Memory: %lu\n", o->gpuLocalMax);
printf("\tMax Alloc Memory: %lu\n", o->gpuAllocMax);
return o;
}
i32 gpusolve2(gpusolver* gs, cnf* c) {
u32 wcnt = 1 + (c->cnts[0] >> 5U);
u32* solution = calloc((wcnt + 1), sizeof(u32));
if (solution == NULL) {
printf("Failed to allocate solution buffer\n");
exit(1);
}
gs->gpuCUs = 1024;
mpz_t gmpmax;
mpz_init(gmpmax);
mpz_ui_pow_ui(gmpmax, 2, c->cnts[0]);
mpz_div_ui(gmpmax, gmpmax, gs->gpuCUs);
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
// mpz_out_str(stdout, 10, gmpmax);
// printf("\n\n");
mpz_clear(gmpmax);
solution[0] = 0;
cl_int res = 2;
cl_mem gpuheader = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, 2 * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF header buffer\n");
exit(1);
}
cl_mem gpulvars = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, 3 * c->cnts[1] * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF lvar buffer\n");
exit(1);
}
cl_mem gpuvariables = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF variable buffer\n");
exit(1);
}
cl_mem gpuparities = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uchar), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF parity buffer\n");
exit(1);
}
cl_mem gpuoutput = clCreateBuffer(gs->ctx, CL_MEM_READ_WRITE, (wcnt + 1) * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create output buffer\n");
exit(1);
}
cl_mem gpuscratchpad = clCreateBuffer(gs->ctx, CL_MEM_READ_WRITE, 2 * wcnt * gs->gpuCUs * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create output buffer\n");
exit(1);
}
// Load buffers to GPU
res = clEnqueueWriteBuffer(gs->commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF header write\n");
exit(1);
}
res = clEnqueueWriteBuffer(gs->commqueue, gpulvars, CL_TRUE, 0, 3 * c->cnts[1] * sizeof(cl_uint), c->clausedat, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF lvar write\n");
exit(1);
}
res = clEnqueueWriteBuffer(gs->commqueue, gpuvariables, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uint), c->variables, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF variable write\n");
exit(1);
}
res = clEnqueueWriteBuffer(gs->commqueue, gpuparities, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uchar), c->parities, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF parity write\n");
exit(1);
}
res = clEnqueueWriteBuffer(gs->commqueue, gpuoutput, CL_TRUE, 0, (wcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF parity write\n");
exit(1);
}
res = clSetKernelArg(gs->kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
res = clSetKernelArg(gs->kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
res = clSetKernelArg(gs->kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
res = clSetKernelArg(gs->kernel, 3, sizeof(cl_mem), (void*) &gpuparities);
res = clSetKernelArg(gs->kernel, 4, sizeof(cl_mem), (void*) &gpuoutput);
res = clSetKernelArg(gs->kernel, 5, sizeof(cl_mem), (void*) &gpuscratchpad);
size_t deploySize[2] = { gs->gpuCUs, 64 };
res = clEnqueueNDRangeKernel(gs->commqueue, gs->kernel, 1, NULL, &(gs->gpuCUs), &(gs->gpuCUs), 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue kernel for execution\n");
exit(res);
}
res = clEnqueueReadBuffer(gs->commqueue, gpuoutput, CL_TRUE, 0, (wcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to read kernel output\n");
exit(1);
}
// u64 endtime = utime();
if (solution[0] == 0) {
printf("UNSAT\n");
} else if (solution[0] == 1) {
printf("SAT: ");
for (u32 k = 0; k < c->cnts[0]; ++k) {
u32 vind = (c->cnts[0] - 1) - k;
u32 iind = vind >> 5U;
u32 bind = vind & 0b11111U;
u8 par = (solution[iind + 1] >> bind) & 1U;
printf("%u", par);
}
if (CHECKASGN) {
u8 checkres = 0;
for (u32 i = 0; i < c->cnts[1]; ++i) {
checkres = 0;
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) {
u32 v = c->variables[c->clausedat[3 * i] + j];
u32 vv = c->cnts[0] - 1;
u32 g = (vv - v) >> 5U;
u32 h = (vv - v) & 0b11111U;
u8 paract = (solution[g + 1] >> h) & 1U;
if (c->parities[c->clausedat[3 * i] + j] == paract) {
checkres = 1;
break;
}
}
if (!checkres) break;
}
if (checkres) {
printf(" \xE2\x9C\x93\n");
} else {
printf(" -\n");
}
}
} else {
printf("What the fuck???\n");
solution[0] = 3;
}
res = clReleaseMemObject(gpuheader);
res = clReleaseMemObject(gpulvars);
res = clReleaseMemObject(gpuvariables);
res = clReleaseMemObject(gpuparities);
res = clReleaseMemObject(gpuoutput);
res = clReleaseMemObject(gpuscratchpad);
i32 retval = (i32) solution[0];
free(solution);
return retval;
}
void freeSolver(gpusolver* gs) {
i32 res = 0;
res = clFlush(gs->commqueue);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clFinish(gs->commqueue);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseKernel(gs->kernel);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseProgram(gs->program);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseCommandQueue(gs->commqueue);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseContext(gs->ctx);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseDevice(gs->deviceid);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
free(gs->source_str);
free(gs);
}
i32 gpusolve(cnf* c) {
cl_platform_id platformid = NULL;
cl_device_id deviceid = NULL;
@@ -45,6 +378,7 @@ i32 gpusolve(cnf* c) {
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
mpz_clear(gmpmax);
// printf("%lu\n", wordcnt);
cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms);
if (res != CL_SUCCESS) {
printf("Failed to retrieve OpenCL platform IDs\n");
@@ -58,6 +392,36 @@ i32 gpusolve(cnf* c) {
}
// printf("Found %u devices\n", numdevices);
u64 memoryMax = 0;
res = clGetDeviceInfo(deviceid, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &memoryMax, NULL);
if (res != CL_SUCCESS) {
printf("Failed to query GPU memory\n");
exit(1);
}
u64 localMax = 0;
res = clGetDeviceInfo(deviceid, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMax, NULL);
if (res != CL_SUCCESS) {
printf("Failed to query GPU memory\n");
exit(1);
}
u64 allocMax = 0;
res = clGetDeviceInfo(deviceid, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &allocMax, NULL);
if (res != CL_SUCCESS) {
printf("Failed to query GPU memory\n");
exit(1);
}
printf("GPU mem: %lu %lu %lu\n", memoryMax, localMax, allocMax);
size_t computeUnits = 0;
res = clGetDeviceInfo(deviceid, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &computeUnits, NULL);
if (res != CL_SUCCESS) {
printf("Failed to query GPU memory\n");
exit(1);
}
printf("Compute Units: %lu\n", computeUnits);
cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create OpenCL context\n");
@@ -118,6 +482,12 @@ i32 gpusolve(cnf* c) {
exit(1);
}
cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * wordcnt * GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create output buffer\n");
exit(1);
}
// Load buffers to GPU
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL);
if (res != CL_SUCCESS) {
@@ -176,7 +546,7 @@ i32 gpusolve(cnf* c) {
size_t maxworkgrpu = 0;
res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL);
// printf("Max work group size: %lu\n", maxworkgrpu);
printf("Max work group size: %lu\n", maxworkgrpu);
res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
@@ -185,7 +555,7 @@ i32 gpusolve(cnf* c) {
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuoutput);
res = clSetKernelArg(kernel, 5, 2 * wordcnt * sizeof(cl_uint) * LOCAL_SIZE, NULL);
res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuscratchpad);
// u64 starttime = utime();
size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE };

View File

@@ -1,4 +1,27 @@
#pragma once
#include "ncnf.h"
#include <CL/cl.h>
typedef struct {
cl_platform_id platformid;
cl_device_id deviceid;
cl_uint numdevices;
cl_uint numplatforms;
char* source_str;
size_t source_size;
cl_context ctx;
cl_command_queue commqueue;
cl_program program;
cl_kernel kernel;
u64 gpuMemoryMax;
u64 gpuLocalMax;
u64 gpuAllocMax;
u64 gpuCUs;
} gpusolver;
gpusolver* initSolver();
i32 gpusolve2(gpusolver* gs, cnf* c);
void freeSolver(gpusolver* gs);
i32 gpusolve(cnf* c);

226
main.c
View File

@@ -2,67 +2,13 @@
#include "gpusolver.h"
#include "time.h"
#include "tests/masterTest.h"
#include "gmp.h"
#include "rng.h"
#include "ncnf.h"
#define ADD (0)
#define CMP (1)
#define CHK (2)
#include "cpusolver.h"
void ctrthings2(cnf* c, u32* state, u32* ctr, u32* max) {
u32 wcnt = 1U + (c->cnts[0] >> 5U);
u32* mode = state;
u32* index = state + 1;
u32* addval = state + 2;
u32 varcnt = c->cnts[0] - 1;
u32 chkmsk = 0xFFFFFFFFU * (*mode == CHK);
u32 chkcls = *index & chkmsk;
u32 chkind = c->clausedat[3 * chkcls] + (*addval & chkmsk);
u32 var = c->variables[chkind];
u8 par = c->parities[chkind];
u32 vword = (varcnt - var) >> 5U;
u32 vbit = (varcnt - var) & 0b11111U;
u8 corpar = (ctr[vword] >> vbit) & 1U;
u8 isvalid = (par == corpar);
u8 islvar = ((*addval + 1) == c->clausedat[3 * chkcls + 1]);
u8 isbchk0 = (*mode == CHK);
u8 isbchk1 = isbchk0 & isvalid;
u8 isbchk2 = isbchk1 & islvar;
u32 j = c->clausedat[3 * chkcls + 2];
*mode -= 2 * isbchk2;
*index = (j >> 5U) * isbchk2 + *index * (!isbchk2);
*addval = (1U << (j & 0b11111U)) * isbchk2 + *addval * (!isbchk2);
*addval += ((isbchk1) & (!islvar));
u8 isbchk3 = (isbchk0 & (!isvalid));
*addval *= (!isbchk3);
*index += (isbchk3);
u8 issat = (*index == c->cnts[1]) * (isbchk3);
u32 cmpaddind = *index * (*mode != CHK);
u32 nval = ctr[cmpaddind] + *addval; // Find the result of the current step if it was addition
*addval = (nval < ctr[cmpaddind]) * (*mode == ADD) + (*addval) * (*mode == CHK); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone.
ctr[cmpaddind] = nval * ((*mode == ADD) & !issat) + ctr[cmpaddind] * ((*mode != ADD) | issat); // If in add mode, set new ctr val, otherwise leave unchanged
*addval -= (ctr[cmpaddind] < max[cmpaddind]) * (*mode == CMP); // If in comparison mode, decrement addval if less than
*addval += (ctr[cmpaddind] > max[cmpaddind]) * (*mode == CMP); // If in comparison mode, increment addval if greater than
u8 addcond = (*addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word
u8 cmpcond = (*addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word
u8 exittime = (*mode == CMP) & cmpcond & (*addval != -1);
exittime |= issat;
if (exittime) { // If in cmpmode and the comparison result is not less than, unsat
printf("Result: %u\n", issat);
*mode = 4;
return;
}
u8 cmpdone = cmpcond & (*mode == CMP); // if comparison completion conditions are satisfied and in CMP mode
u32 addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1
*index = addindex * (*mode == ADD) + (*index - (*mode == CMP)) * (*mode != ADD); // If in add mode, use addindex; if in cmp mode, decrement index by 1
*index *= !cmpdone;
*addval *= !(((addcond) & (*mode == ADD)) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged.
*mode += addcond * (*mode == ADD) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode.
}
void printbits(unsigned a) {
for (unsigned i = 0; i < 32; ++i) {
@@ -105,123 +51,41 @@ void mul(u32* c, u32 len, u32* a, u32 b) {
}
}
i32 runuf20lol() {
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
char buf[128];
i32 len = sprintf(buf, "/home/lev/Downloads/uf20/uf20-0%u.cnf", i + 1);
debugsolve(buf);
passed++;
}
return 1;
}
i32 runuf50lol() {
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
char buf[128];
i32 len = sprintf(buf, "/home/lev/Downloads/uf50/uf50-0%u.cnf", i + 1);
debugsolve(buf);
passed++;
}
return 1;
}
int main() {
/*
printf("Tests: %lu\n", TESTS);
rngstate rng;
u64 rseed = utime();
seed(&rng, rseed);
printf("Seed: %lu\n", rseed);
mpz_t a, b, c, d, e;
mpz_inits(a, b, c, d, e, NULL);
u32 ctrp[CSZE];
u32 maxp[CSZE];
u32* ctr = ctrp;
u32* max = maxp;
u32 state[3];
u32 hdr[2];
char buf[4096];
for (u64 i = 0; i < TESTS; ++i) {
memset(ctr, 0, sizeof(u32) * CSZE);
memset(max, 0, sizeof(u32) * CSZE);
//u32 lenval = ru32(&rng) % CSZE;
u32 lenval = CSZE;
if (lenval < 2) lenval = 2;
u32 jval = ru32(&rng) % ((lenval - 1) * 32);
for (u32 j = 0; j < lenval - 2; ++j) {
ctr[j] = ru32(&rng);
max[j] = ru32(&rng);
}
mpz_import(a, lenval, -1, sizeof(u32), 0, 0, ctr);
mpz_import(b, lenval, -1, sizeof(u32), 0, 0, max);
state[1] = jval >> 5U;
state[2] = jval & 0b11111U;
state[2] = 1U << state[2];
mpz_ui_pow_ui(c, 2, jval);
if (rf32(&rng) < eqprob && mpz_cmp) {
mpz_sub(a, b, c);
if (mpz_sgn(a) != -1) {
mpz_export(ctr, NULL, -1, sizeof(u32), 0, 0, a);
} else {
mpz_import(a, lenval, -1, sizeof(u32), 0, 0, ctr);
}
}
mpz_add(d, a, c);
state[0] = 0;
while (state[0] < 2U) {
ctrthings2(hdr, lenval, state, ctr, max);
}
mpz_import(c, lenval, -1, sizeof(u32), 0, 0, ctr);
i32 res = mpz_cmp(d, b);
if (res == -1) {
if (state[2] != -1) {
printf("Fuck2 %lu\n", i);
printf("d: ");
mpz_out_str(stdout, 10, d);
printf("\nb: ");
mpz_out_str(stdout, 10, b);
printf("\n");
printf("mode: %u\n", state[2]);
exit(0);
}
} else {
if (state[2] != 2) {
printf("Fuck3 %lu\n", i);
printf("d: ");
mpz_out_str(stdout, 10, d);
printf("\nc: ");
mpz_out_str(stdout, 10, c);
printf("\nb: ");
mpz_out_str(stdout, 10, b);
printf("\n");
printf("mode: %u\n", state[2]);
exit(0);
}
}
res = mpz_cmp(d, c);
if (res != 0) {
printf("Fuck %lu\na: ", i);
mpz_out_str(stdout, 10, a);
printf("\nd: ");
mpz_out_str(stdout, 10, d);
printf("\nc: ");
mpz_out_str(stdout, 10, c);
printf("\n%u %u\n", lenval, jval);
exit(0);
}
}
mpz_clears(a, b, c, d, e, NULL);
*/
// debugsolve("/home/lev/Downloads/uf50/uf50-01.cnf");
// runuf20lol();
// runTests();
/*
srand( utime());
@@ -250,19 +114,29 @@ int main() {
/*
cnf* c = readDIMACS("/home/lev/Downloads/logistics/logistics.d.cnf");
cnf* c = readDIMACS("/home/heka/Downloads/uf20/uf20-03.cnf");
sortlastnum(c);
printf("%u\n", c->cnts[0]);
// gpusolve(c);
gpusolve(c);
gpusolver* gs = initSolver();
if (gs == NULL) return -1;
i32 res = gpusolve2(gs, c);
freeSolver(gs);
freecnf(c);
return 0;
*/
runTests();
return 0;
// runTests();
// return 0;
/*
// printf("%u\n", c->litcnt);

27
ncnf.c
View File

@@ -60,6 +60,8 @@ cnf* readDIMACS(char* path) {
c->clausedat = calloc(*clausecnt, sizeof(u32) * 3);
CHECK(c->clausedat, "Failed to allocate clause data\n")
c->index = calloc(*varcnt, sizeof(u32));
CHECK(c->index, "Failed to allocate clause index\n")
c->variables = calloc(cap, sizeof(u32));
CHECK(c->variables, "Failed to allocate literal variables\n")
c->parities = calloc(cap, sizeof(u8));
@@ -208,4 +210,29 @@ void sortlastnum(cnf* c) {
}
free(d);
// TODO: Rewrite, the following is copied from Gábor Kusper's implementation
u32 lastNum = 0;
for (u32 i = c->cnts[1] - 1; i < c->cnts[1]; --i) {
if (c->clausedat[3 * i + 2] > lastNum) {
while (lastNum < c->clausedat[3 * i + 2]) {
c->index[lastNum] = i + 1;
lastNum++;
}
}
}
u32 corrInd = 0;
while (c->index[corrInd] == c->cnts[1]) {
corrInd++;
}
if (corrInd != 0) {
u32 goodVal = c->index[corrInd];
do {
corrInd--;
c->index[corrInd] = goodVal;
} while (corrInd != 0);
}
}

5
ncnf.h
View File

@@ -12,6 +12,7 @@
typedef struct {
u32 cnts[3]; // { varcnt, clausecnt }
u32* clausedat; // { ind, len, jval }
u32* index;
u32* variables;
u8* parities;
} cnf;
@@ -23,3 +24,7 @@ void printcnf(cnf* c);
void sortlastnum(cnf* c);
void freecnf(cnf* c);
/* -mavx2 -O3 -ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts -ftree-vectorize -ftracer -funroll-all-loops
*
*/

22
psat.cl
View File

@@ -61,9 +61,7 @@ void mul(uint* c, uint len, uint* a, uint b) {
}
}
__kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clausedat, __global const uint* vars, __global const uchar* pars, __global uint* output, __local uint* lctrs) {
uint locid = get_local_id(0);
uint locsz = get_local_size(0);
__kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clausedat, __global const uint* vars, __global const uchar* pars, __global uint* output, __global uint* lctrs) {
// uint grpid = get_group_id(0);
// uint grpcn = get_num_groups(0);
uint globid = get_global_id(0);
@@ -75,22 +73,22 @@ __kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clause
uint index = 0;
uint addval = 0;
if (globid == 0) output[0] = 0;
uint* ctr = lctrs + wcnt * 2 * locid;
uint* max = lctrs + wcnt * (2 * locid + 1);
//uint ctroff =
uint* ctr = lctrs + wcnt * 2 * globid;
uint* max = lctrs + wcnt * (2 * globid + 1);
for (uint i = 0; i < wcnt; ++i) {
ctr[i] = max[i] = 0;
}
mul(ctr, wcnt, output + 1, globid);
if (globid == globsz) {
if (globid == globsz - 1) {
stateaddpow(wcnt, max, cnfhdr[0]);
} else {
mul(max, wcnt, output + 1, globid + 1);
}
//printf("%u %u\n", ctr[0], max[0]);
//printf("%u %u\n", wcnt * 2 * globid, wcnt * (2 * globid + 1));
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
@@ -113,7 +111,7 @@ __kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clause
uchar isbchk2 = isbchk1 & islvar;
uint j = clausedat[3 * chkcls + 2];
mode -= 2 * isbchk2;
// if (isbchk2) printf("j: %u\n", j);
//if (isbchk2 && globid == 0) printf("%u j: %u\n", globid, j);
index = (j >> 5U) * isbchk2 + index * (!isbchk2);
addval = (1U << (j & 0b11111U)) * isbchk2 + addval * (!isbchk2);
addval += ((isbchk1) & (!islvar));
@@ -140,9 +138,11 @@ __kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clause
}
output[0] = 1;
}
}
return;
}
//if (globid == 0) printf("fuck %u %u\n", ctr[0], max[0]);
break;
}
uchar cmpdone = cmpcond & (mode == 1); // if comparison completion conditions are satisfied and in CMP mode
uint addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1
index = addindex * (mode == 0) + (index - (mode == 1)) * (mode != 0); // If in add mode, use addindex; if in cmp mode, decrement index by 1

278
psatv0.cl Normal file
View File

@@ -0,0 +1,278 @@
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#define DEBUG
#ifdef DEBUG
#define DBG(X) printf("%s\n", (X))
#else
#define DBG(X) do {} while (0)
#endif
static inline void printbits(uint a) {
for (uint i = 0; i < 32; ++i) {
uint ind = 31 - i;
printf("%u", (a >> ind) & 1U);
}
}
static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
uint corpow = pow & 0b11111U;
uint startind = pow >> 5U;
uint tr = 1U << corpow;
uint tval = state[startind] + tr;
bool carry = tval < state[startind];
state[startind] = tval;
if (carry) {
uint i = startind + 1;
for ( ; i < wcnt; ++i) {
state[i]++;
if (state[i]) break;
}
}
}
/*
static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
uint corpow = pow & 0b11111U;
uint startind = pow >> 5U;
uint tr = 1U << corpow;
uint tval = state[startind] + tr;
bool carry = tval < state[startind];
uint i = startind + 1;
for ( ; i < wcnt; ++i) {
state[i] = carry + state[i];
carry = (state[i] == 0) * carry;
}
//if (carry) {
//uint i = startind + 1;
//for ( ; i < wcnt; ++i) {
//state[i]++;
//if (state[i]) break;
//}
//}
}
*/
__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) {
output[0] = 2;
__local uint setmax;
uint cnt = cnfheader[0];
uint vcnt = cnfheader[1];
uint ccnt = cnfheader[2];
uint wcnt = 1 + (vcnt >> 5U);
uint maxctr = 1U << (vcnt & 0b11111U);
uint glbid = get_global_id(0);
uint glbsz = get_global_size(0);
/*
uint locid = get_local_id(0);
uint locsz = get_local_size(0);
uint grpid = get_group_id(0);
uint grpcn = get_num_groups(0);
*/
// Zero out the counter
for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;
// Set all scratchpad clauses to true
for (uint j = 0; j < ccnt; j += glbsz) {
uchar cond = (j + glbid) < ccnt;
// If ptr would go past end of array, set it to last element
j = j * cond + (!cond) * (ccnt - glbid - 1);
scratchpad[j + glbid] = 1;
// if ((j + glbid) < ccnt)
}
bool done = false;
__local uint iter;
__local uint firstind[1];
iter = 0;
while (output[0] == 2) {
firstind[0] = ccnt;
if (glbid == 0) {
iter = iter + 1;
}
// if (glbid == 0) printf("%s\n", ":~");
setmax = 0;
uint maxnumx = 0;
for (uint j = 0; j < cnt; j += glbsz) {
uchar cond = (j + glbid) < cnt;
// Last element cap
j = j * cond + (!cond) * (cnt - glbid - 1);
uint varind = vars[j + glbid];
varind = (vcnt - 1) - varind;
uint iind = varind >> 5U;
uint bind = varind & 0b11111U;
uchar cpar = (output[iind + 1] >> bind) & 1U;
//uchar cond2 = (cpar != pars[j + glbid]);
//uint dirind = cond2 * clauses[j + glbid] + (!cond2) * ccnt;
// scratchpad[dirind] = 0;
if (cpar != pars[j + glbid]) {
scratchpad[clauses[j + glbid]] = 0;
/*
if (clauses[j + glbid] == 50) {
printf("50 failed on (%u) %u (%u) %u / %u %u\n", pars[j + glbid], vars[j + glbid], cpar, varind, iind, bind);
printbits(output[2]);
printbits(output[1]);
}
*/
}
/*
if ((j + glbid) < cnt) {
uint varind = vars[j + glbid];
varind = (vcnt - 1) - varind;
uint iind = varind >> 5U;
uint bind = varind & 0b11111U;
uchar cpar = (output[iind + 1] >> bind) & 1U;
if (cpar != pars[j + glbid]) {
scratchpad[clauses[j + glbid]] = 0;
// printf("z %u %u\n", j + glbid, clauses[j + glbid]);
}
}
*/
}
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
for (uint j = 0; j < ccnt; j += glbsz) {
/*
if (((j + glbid) < ccnt) && (scratchpad[j + glbid] == 1)) {
setmax = 1;
// printf("%u\n", (~output[1]) & 0b11111);
// printf("%u%u%u%u%u\n", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);
// printf("%u - %u\n", j + glbid, scratchpad[j + glbid]);
}*/
//uchar cond = (j + glbid) < ccnt;
// Last element cap
// j = j * cond + (!cond) * (ccnt - glbid - 1);
if (scratchpad[j + glbid] == 1 && (j + glbid) < ccnt) {
setmax = 1;
// printf("s %u %u %u\n", j + glbid, lvars[j + glbid], iter);
// printf("c %u %u %u\n", j + glbid, scratchpad[j + glbid], lvars[j + glbid]);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
/*
if (glbid == 0) {
printf(">> %u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u\n", (output[1] >> 19) & 1U,(output[1] >> 18) & 1U, (output[1] >> 17) & 1U, (output[1] >> 16) & 1U, (output[1] >> 15) & 1U, (output[1] >> 14) & 1U, (output[1] >> 13) & 1U, (output[1] >> 12) & 1U, (output[1] >> 11) & 1U, (output[1] >> 10) & 1U, (output[1] >> 9) & 1U, (output[1] >> 8) & 1U, (output[1] >> 7) & 1U, (output[1] >> 6) & 1U, (output[1] >> 5) & 1U, (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);
}
*/
if (setmax) {
// Set maxval array to zero
maxvals[glbid] = 0;
// Accumulate and reduce the maximums
for (uint j = 0; j < ccnt; j += glbsz) {
uint a = maxvals[glbid];
uint b = lvars[j + glbid];
uint c = max(a, b);
//uchar cond = (j + glbid) < ccnt;
//uint d = glbid * cond + (!cond) * glbsz;
//maxvals[d] = c;
if ((j + glbid) < ccnt && scratchpad[j + glbid] == 1) {
maxvals[glbid] = c;
// printf("min %u\n", j + glbid);
// atomic_min(firstind, (j + glbid));
}
}
/*
if (glbid == 0) {
for (uint k = 0; k < ccnt; ++k) {
if (scratchpad[k] == 1) {
printf("s %u %u\n", k, lvars[k]); //, iter);
}
}
printf("z %u %u\n", scratchpad[50], scratchpad[199]);
}
*/
barrier(CLK_LOCAL_MEM_FENCE);
// uint maxj = lvars[firstind[0]];
// Set all scratchpad clauses to true
for (uint j = 0; j < ccnt; j += glbsz) {
uchar cond = (j + glbid) < ccnt;
// If ptr would go past end of array, set it to last element
j = j * cond + (!cond) * (ccnt - glbid - 1);
scratchpad[j + glbid] = 1;
// if ((j + glbid) < ccnt)
}
// Final reduction pass
uint maxj = maxvals[0];
for (uint j = 1; j < glbsz; ++j) {
maxj = max(maxj, maxvals[j]);
}
// Add to the counter
if (glbid == 0) {
// printf("> %u\n", maxj);
/*
printbits(output[2]);
printbits(output[1]);
printf("%s", "\n");
*/
// uint temp = ind;
// printf("maxNumx = %u\n", maxj);// firstind[0]);
stateaddpow(wcnt, output + 1, maxj);
/*
printbits(output[2]);
printbits(output[1]);
printf("%s", "\n");
*/
/*
if (output[2] == 60234U) {
printf("%s\n", "YEEDLEY DEET");
printbits(output[2]);
printbits(output[1]);
printf("%s", "\n");
}
*/
// printf(">> %u%u%u%u%u\n", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U);
}
// Check counter for overflow
if (output[wcnt] >= maxctr) {
output[0] = 1;
}
} else {
// SAT. Set status and assignment.
output[0] = 0;
if (glbid == 0) {
for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
}

View File

@@ -3,6 +3,7 @@
#include "../cnf.h"
#include "../gpusolver.h"
#include "../time.h"
#include "../cpusolver.h"
i32 runTests() {
i32 res = runuf20();
@@ -24,7 +25,7 @@ i32 runTests() {
i32 runuf20() {
// printf("Running against uf20\n");
printf("Running against uf20\n");
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
@@ -36,21 +37,21 @@ i32 runuf20() {
sortlastnum(c);
u64 start = utime();
i32 res = gpusolve(c);
i32 res = cpusolve(c);
u64 stop = utime();
tottime += (stop - start);
freecnf(c);
if (res == 1) passed++;
}
// printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
printf("Passed %u / 1000 tests\n", passed);
printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0;
return 1;
}
i32 runuf50() {
// printf("Running against uf50\n");
printf("Running against uf50\n");
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
@@ -62,21 +63,21 @@ i32 runuf50() {
sortlastnum(c);
u64 start = utime();
i32 res = gpusolve(c);
i32 res = cpusolve(c);
u64 stop = utime();
tottime += (stop - start);
freecnf(c);
if (res == 1) passed++;
}
// printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
printf("Passed %u / 1000 tests\n", passed);
printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0;
return 1;
}
i32 runuuf50() {
// printf("Running against uuf50\n");
printf("Running against uuf50\n");
u32 passed = 0;
u64 tottime = 0;
for (u32 i = 0; i < 1000; ++i) {
@@ -88,15 +89,15 @@ i32 runuuf50() {
sortlastnum(c);
u64 start = utime();
i32 res = gpusolve(c);
i32 res = cpusolve(c);
u64 stop = utime();
tottime += (stop - start);
freecnf(c);
if (res == 0) passed++;
}
// printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
printf("Passed %u / 1000 tests\n", passed);
printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0;
return 1;
}