#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable #define DEBUG #ifdef DEBUG #define DBG(X) printf("%s\n", (X)) #else #define DBG(X) do {} while (0) #endif static inline void printbits(uint a) { for (uint i = 0; i < 32; ++i) { uint ind = 31 - i; printf("%u", (a >> ind) & 1U); } } static inline void stateaddpow(uint wcnt, uint* state, uint pow) { uint corpow = pow & 0b11111U; uint startind = pow >> 5U; uint tr = 1U << corpow; uint tval = state[startind] + tr; bool carry = tval < state[startind]; state[startind] = tval; if (carry) { uint i = startind + 1; for ( ; i < wcnt; ++i) { state[i]++; if (state[i]) break; } } } /* static inline void stateaddpow(uint wcnt, uint* state, uint pow) { uint corpow = pow & 0b11111U; uint startind = pow >> 5U; uint tr = 1U << corpow; uint tval = state[startind] + tr; bool carry = tval < state[startind]; uint i = startind + 1; for ( ; i < wcnt; ++i) { state[i] = carry + state[i]; carry = (state[i] == 0) * carry; } //if (carry) { //uint i = startind + 1; //for ( ; i < wcnt; ++i) { //state[i]++; //if (state[i]) break; //} //} } */ __kernel void vectorSAT(__global const uint* cnfheader, __global const uint* lvars, __global const uint* vars, __global const uint* clauses, __global const uchar* pars, __global uint* output, __global uchar* scratchpad, __local uint* maxvals) { output[0] = 2; __local uint setmax; uint cnt = cnfheader[0]; uint vcnt = cnfheader[1]; uint ccnt = cnfheader[2]; uint wcnt = 1 + (vcnt >> 5U); uint maxctr = 1U << (vcnt & 0b11111U); uint glbid = get_global_id(0); uint glbsz = get_global_size(0); /* uint locid = get_local_id(0); uint locsz = get_local_size(0); uint grpid = get_group_id(0); uint grpcn = get_num_groups(0); */ // Zero out the counter for (uint i = 0; i < wcnt; ++i) output[i + 1] = 0; // Set all scratchpad clauses to true for (uint j = 0; j < ccnt; j += glbsz) { uchar cond = (j + glbid) < ccnt; // If ptr would go past end of array, set it to last element j = j * cond + (!cond) * (ccnt - glbid - 1); scratchpad[j + glbid] = 1; // if ((j + glbid) < ccnt) } bool done = false; __local uint iter; __local uint firstind[1]; iter = 0; while (output[0] == 2) { firstind[0] = ccnt; if (glbid == 0) { iter = iter + 1; } // if (glbid == 0) printf("%s\n", ":~"); setmax = 0; uint maxnumx = 0; for (uint j = 0; j < cnt; j += glbsz) { uchar cond = (j + glbid) < cnt; // Last element cap j = j * cond + (!cond) * (cnt - glbid - 1); uint varind = vars[j + glbid]; varind = (vcnt - 1) - varind; uint iind = varind >> 5U; uint bind = varind & 0b11111U; uchar cpar = (output[iind + 1] >> bind) & 1U; //uchar cond2 = (cpar != pars[j + glbid]); //uint dirind = cond2 * clauses[j + glbid] + (!cond2) * ccnt; // scratchpad[dirind] = 0; if (cpar != pars[j + glbid]) { scratchpad[clauses[j + glbid]] = 0; /* if (clauses[j + glbid] == 50) { printf("50 failed on (%u) %u (%u) %u / %u %u\n", pars[j + glbid], vars[j + glbid], cpar, varind, iind, bind); printbits(output[2]); printbits(output[1]); } */ } /* if ((j + glbid) < cnt) { uint varind = vars[j + glbid]; varind = (vcnt - 1) - varind; uint iind = varind >> 5U; uint bind = varind & 0b11111U; uchar cpar = (output[iind + 1] >> bind) & 1U; if (cpar != pars[j + glbid]) { scratchpad[clauses[j + glbid]] = 0; // printf("z %u %u\n", j + glbid, clauses[j + glbid]); } } */ } barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); for (uint j = 0; j < ccnt; j += glbsz) { /* if (((j + glbid) < ccnt) && (scratchpad[j + glbid] == 1)) { setmax = 1; // printf("%u\n", (~output[1]) & 0b11111); // printf("%u%u%u%u%u\n", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U); // printf("%u - %u\n", j + glbid, scratchpad[j + glbid]); }*/ //uchar cond = (j + glbid) < ccnt; // Last element cap // j = j * cond + (!cond) * (ccnt - glbid - 1); if (scratchpad[j + glbid] == 1 && (j + glbid) < ccnt) { setmax = 1; // printf("s %u %u %u\n", j + glbid, lvars[j + glbid], iter); // printf("c %u %u %u\n", j + glbid, scratchpad[j + glbid], lvars[j + glbid]); } } barrier(CLK_LOCAL_MEM_FENCE); /* if (glbid == 0) { printf(">> %u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u\n", (output[1] >> 19) & 1U,(output[1] >> 18) & 1U, (output[1] >> 17) & 1U, (output[1] >> 16) & 1U, (output[1] >> 15) & 1U, (output[1] >> 14) & 1U, (output[1] >> 13) & 1U, (output[1] >> 12) & 1U, (output[1] >> 11) & 1U, (output[1] >> 10) & 1U, (output[1] >> 9) & 1U, (output[1] >> 8) & 1U, (output[1] >> 7) & 1U, (output[1] >> 6) & 1U, (output[1] >> 5) & 1U, (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U); } */ if (setmax) { // Set maxval array to zero maxvals[glbid] = 0; // Accumulate and reduce the maximums for (uint j = 0; j < ccnt; j += glbsz) { uint a = maxvals[glbid]; uint b = lvars[j + glbid]; uint c = max(a, b); //uchar cond = (j + glbid) < ccnt; //uint d = glbid * cond + (!cond) * glbsz; //maxvals[d] = c; if ((j + glbid) < ccnt && scratchpad[j + glbid] == 1) { maxvals[glbid] = c; // printf("min %u\n", j + glbid); // atomic_min(firstind, (j + glbid)); } } /* if (glbid == 0) { for (uint k = 0; k < ccnt; ++k) { if (scratchpad[k] == 1) { printf("s %u %u\n", k, lvars[k]); //, iter); } } printf("z %u %u\n", scratchpad[50], scratchpad[199]); } */ barrier(CLK_LOCAL_MEM_FENCE); // uint maxj = lvars[firstind[0]]; // Set all scratchpad clauses to true for (uint j = 0; j < ccnt; j += glbsz) { uchar cond = (j + glbid) < ccnt; // If ptr would go past end of array, set it to last element j = j * cond + (!cond) * (ccnt - glbid - 1); scratchpad[j + glbid] = 1; // if ((j + glbid) < ccnt) } // Final reduction pass uint maxj = maxvals[0]; for (uint j = 1; j < glbsz; ++j) { maxj = max(maxj, maxvals[j]); } // Add to the counter if (glbid == 0) { // printf("> %u\n", maxj); /* printbits(output[2]); printbits(output[1]); printf("%s", "\n"); */ // uint temp = ind; // printf("maxNumx = %u\n", maxj);// firstind[0]); stateaddpow(wcnt, output + 1, maxj); /* printbits(output[2]); printbits(output[1]); printf("%s", "\n"); */ /* if (output[2] == 60234U) { printf("%s\n", "YEEDLEY DEET"); printbits(output[2]); printbits(output[1]); printf("%s", "\n"); } */ // printf(">> %u%u%u%u%u\n", (output[1] >> 4) & 1U, (output[1] >> 3) & 1U, (output[1] >> 2) & 1U, (output[1] >> 1) & 1U, output[1] & 1U); } // Check counter for overflow if (output[wcnt] >= maxctr) { output[0] = 1; } } else { // SAT. Set status and assignment. output[0] = 0; if (glbid == 0) { for (uint i = 0; i < wcnt; ++i) output[i + 1] = ~output[i + 1]; } } barrier(CLK_LOCAL_MEM_FENCE); } }