Early subsumption complete
Signed-off-by: gothictomato <gothictomato@pm.me>
This commit is contained in:
192
psat.cl
192
psat.cl
@@ -32,120 +32,122 @@ static inline void stateaddpow(uint wcnt, uint* state, uint pow) {
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) {
|
||||
output[0] = 2;
|
||||
void mul(uint* c, uint len, uint* a, uint b) {
|
||||
uint carry = 0;
|
||||
for (uint i = 0; i < len; ++i) {
|
||||
uint ncarry;
|
||||
uint blo = a[i] & 0xFFFFU;
|
||||
uint bhi = a[i] >> 16U;
|
||||
uint ilo = b & 0xFFFFU;
|
||||
uint ihi = b >> 16U;
|
||||
|
||||
__local uint setmax;
|
||||
*(c + i) = ilo * blo;
|
||||
uint b1 = ilo * bhi;
|
||||
uint c1 = ihi * blo;
|
||||
ncarry = ihi * bhi;
|
||||
|
||||
uint cnt = cnfheader[0];
|
||||
uint vcnt = cnfheader[1];
|
||||
uint ccnt = cnfheader[2];
|
||||
|
||||
uint wcnt = 1 + (vcnt >> 5U);
|
||||
|
||||
uint maxctr = 1U << (vcnt & 0b11111U);
|
||||
|
||||
//uint glbid = get_global_id(0);
|
||||
//uint glbsz = get_global_size(0);
|
||||
b1 += c1;
|
||||
ncarry += (b1 < c1) << 16U;
|
||||
uint bblo = b1 & 0xFFFFU;
|
||||
uint bbhi = b1 >> 16U;
|
||||
bblo <<= 16U;
|
||||
*(c + i) += bblo;
|
||||
uchar acarry = *(c + i) < bblo;
|
||||
|
||||
ncarry += bbhi + acarry;
|
||||
c[i] += carry;
|
||||
ncarry += c[i] < carry;
|
||||
carry = ncarry;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clausedat, __global const uint* vars, __global const uchar* pars, __global uint* output, __local uint* lctrs) {
|
||||
uint locid = get_local_id(0);
|
||||
uint locsz = get_local_size(0);
|
||||
// uint grpid = get_group_id(0);
|
||||
// uint grpcn = get_num_groups(0);
|
||||
uint globid = get_global_id(0);
|
||||
uint globsz = get_global_size(0);
|
||||
|
||||
// Zero out the counter
|
||||
for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0;
|
||||
uint wcnt = 1U + (cnfhdr[0] >> 5U);
|
||||
|
||||
// Set all scratchpad clauses to true
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
uchar cond = (j + locid) < ccnt;
|
||||
j = j * cond + (!cond) * (ccnt - locid - 1);
|
||||
scratchpad[j + locid] = 1;
|
||||
uint mode = 2;
|
||||
uint index = 0;
|
||||
uint addval = 0;
|
||||
|
||||
|
||||
if (globid == 0) output[0] = 0;
|
||||
|
||||
uint* ctr = lctrs + wcnt * 2 * locid;
|
||||
uint* max = lctrs + wcnt * (2 * locid + 1);
|
||||
|
||||
for (uint i = 0; i < wcnt; ++i) {
|
||||
ctr[i] = max[i] = 0;
|
||||
}
|
||||
|
||||
__local uint firstind[1];
|
||||
while (output[0] == 2) {
|
||||
firstind[0] = ccnt;
|
||||
|
||||
setmax = 0;
|
||||
uint maxnumx = 0;
|
||||
|
||||
for (uint j = 0; j < cnt; j += locsz) {
|
||||
|
||||
uchar cond = (j + locid) < cnt;
|
||||
// Last element cap
|
||||
j = j * cond + (!cond) * (cnt - locid - 1);
|
||||
uint varind = vars[j + locid];
|
||||
varind = (vcnt - 1) - varind;
|
||||
uint iind = varind >> 5U;
|
||||
uint bind = varind & 0b11111U;
|
||||
uchar cpar = (output[iind + 1] >> bind) & 1U;
|
||||
|
||||
if (cpar != pars[j + locid]) {
|
||||
scratchpad[clauses[j + locid]] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
|
||||
mul(ctr, wcnt, output + 1, globid);
|
||||
if (globid == globsz) {
|
||||
stateaddpow(wcnt, max, cnfhdr[0]);
|
||||
} else {
|
||||
mul(max, wcnt, output + 1, globid + 1);
|
||||
}
|
||||
|
||||
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
if (scratchpad[j + locid] == 1 && (j + locid) < ccnt) {
|
||||
setmax = 1;
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uint varcnt = cnfhdr[0] - 1;
|
||||
while (output[0] == 0) {
|
||||
|
||||
if (setmax) {
|
||||
// Set maxval array to zero
|
||||
maxvals[locid] = 0;
|
||||
uint chkmsk = 0xFFFFFFFFU * (mode == 2U);
|
||||
uint chkcls = index & chkmsk;
|
||||
uint chkind = clausedat[3 * chkcls] + (addval & chkmsk);
|
||||
uint var = vars[chkind];
|
||||
uchar par = pars[chkind];
|
||||
uint vword = (varcnt - var) >> 5U;
|
||||
uint vbit = (varcnt - var) & 0b11111U;
|
||||
uchar corpar = (ctr[vword] >> vbit) & 1U;
|
||||
uchar isvalid = (par == corpar);
|
||||
uchar islvar = ((addval + 1) == clausedat[3 * chkcls + 1]);
|
||||
uchar isbchk0 = (mode == 2U);
|
||||
uchar isbchk1 = isbchk0 & isvalid;
|
||||
uchar isbchk2 = isbchk1 & islvar;
|
||||
uint j = clausedat[3 * chkcls + 2];
|
||||
mode -= 2 * isbchk2;
|
||||
// if (isbchk2) printf("j: %u\n", j);
|
||||
index = (j >> 5U) * isbchk2 + index * (!isbchk2);
|
||||
addval = (1U << (j & 0b11111U)) * isbchk2 + addval * (!isbchk2);
|
||||
addval += ((isbchk1) & (!islvar));
|
||||
uchar isbchk3 = (isbchk0 & (!isvalid));
|
||||
addval *= (!isbchk3);
|
||||
index += (isbchk3);
|
||||
uchar issat = (index == cnfhdr[1]) * (isbchk3);
|
||||
|
||||
// Accumulate and reduce the maximums
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
//uint a = maxvals[locid];
|
||||
//uint b = lvars[j + locid];
|
||||
// uint c = max(a, b);
|
||||
if ((j + locid) < ccnt && scratchpad[j + locid] == 1) {
|
||||
//maxvals[locid] = c;
|
||||
atomic_min(firstind, (j + locid));
|
||||
uint cmpaddind = index * (mode != 2U);
|
||||
uint nval = ctr[cmpaddind] + addval; // Find the result of the current step if it was addition
|
||||
addval = (nval < ctr[cmpaddind]) * (mode == 0) + (addval) * (mode == 2U); // If in add mode, set addval to carry. If in cmp mode, set to 0. If in check mode, leave alone.
|
||||
ctr[cmpaddind] = nval * ((mode == 0) & !issat) + ctr[cmpaddind] * ((mode != 0) | issat); // If in add mode, set new ctr val, otherwise leave unchanged
|
||||
addval -= (ctr[cmpaddind] < max[cmpaddind]) * (mode == 1); // If in comparison mode, decrement addval if less than
|
||||
addval += (ctr[cmpaddind] > max[cmpaddind]) * (mode == 1); // If in comparison mode, increment addval if greater than
|
||||
uchar addcond = (addval == 0) | (cmpaddind == (wcnt - 1)); // Exit condition for the ADD state: If addval is zero (no carry) or we're at the last word
|
||||
uchar cmpcond = (addval != 0) | (cmpaddind == 0); // Exit condition for the CMP state: if addval is nonzero (lt or gt) or we're at the least significant word
|
||||
uchar exittime = (mode == 1) & cmpcond & (addval != -1);
|
||||
exittime |= issat;
|
||||
if (exittime) { // If in cmpmode and the comparison result is not less than, unsat
|
||||
if (issat) {
|
||||
if (atomic_cmpxchg(output, 0, 1) == 0) {
|
||||
for (uint i = 0; i < wcnt; ++i) {
|
||||
output[i + 1] = ~ctr[i];
|
||||
}
|
||||
output[0] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
uint maxj = lvars[firstind[0]];
|
||||
|
||||
// Set all scratchpad clauses to true
|
||||
for (uint j = 0; j < ccnt; j += locsz) {
|
||||
uchar cond = (j + locid) < ccnt;
|
||||
j = j * cond + (!cond) * (ccnt - locid - 1);
|
||||
scratchpad[j + locid] = 1;
|
||||
}
|
||||
|
||||
// Final reduction pass
|
||||
/*
|
||||
uint maxj = maxvals[0];
|
||||
for (uint j = 1; j < locsz; ++j) {
|
||||
maxj = max(maxj, maxvals[j]);
|
||||
}
|
||||
*/
|
||||
|
||||
// Add to the counter
|
||||
if (locid == 0) {
|
||||
stateaddpow(wcnt, output + 1, maxj);
|
||||
}
|
||||
|
||||
if (output[wcnt] >= maxctr) {
|
||||
output[0] = 1;
|
||||
}
|
||||
} else {
|
||||
output[0] = 0;
|
||||
if (locid == 0) {
|
||||
for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1];
|
||||
}
|
||||
return;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
uchar cmpdone = cmpcond & (mode == 1); // if comparison completion conditions are satisfied and in CMP mode
|
||||
uint addindex = (cmpaddind + 1) * !addcond + (wcnt - 1) * addcond; // if add completion is satisfied, set index to most significant word, else increment by 1
|
||||
index = addindex * (mode == 0) + (index - (mode == 1)) * (mode != 0); // If in add mode, use addindex; if in cmp mode, decrement index by 1
|
||||
index *= !cmpdone;
|
||||
addval *= !(((addcond) & (mode == 0)) | cmpdone); // If add is complete, or cmp is complete, zero. Else leave unchanged.
|
||||
mode += addcond * (mode == 0) + cmpdone; // If in add mode and add completion is reached, increment mode. If in cmp mode and cmp completion reached, increment mode.
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user