Working to make OpenCL recognize the limitations of the device its on, as well as reusing the OpenCL kernel/context for multiple runs.

Signed-off-by: gothictomato <gothictomato@pm.me>
This commit is contained in:
gothictomato
2022-08-27 09:11:29 -04:00
parent 84374e23fd
commit e2b633c802
5 changed files with 424 additions and 16 deletions

View File

@@ -1,16 +1,346 @@
#include "gpusolver.h" #include "gpusolver.h"
#include <CL/cl.h>
#include "time.h" #include "time.h"
#include "gmp.h" #include "gmp.h"
#define LOCAL_SIZE (128) #define LOCAL_SIZE (64)
#define GLOBAL_SIZE (1024) #define GLOBAL_SIZE (2048)
#define CHECKASGN (true) #define CHECKASGN (true)
#define DEBUG #define DEBUG
gpusolver* initSolver() {
gpusolver* o = calloc(1, sizeof(gpusolver));
if (o == NULL) return NULL;
o->platformid = NULL;
o->numplatforms = 0;
o->deviceid = NULL;
o->numdevices = 0;
FILE* fp = fopen("../psat.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel\n");
// TODO: Cleanup
return NULL;
}
o->source_str = malloc(0x100000);
o->source_size = fread(o->source_str, 1, 0x100000, fp);
o->source_str = realloc(o->source_str, o->source_size + 1);
if (o->source_str == NULL) {
printf("Failed to reallocate source\n");
return NULL;
}
fclose(fp);
cl_int res = clGetPlatformIDs(1, &(o->platformid), &(o->numplatforms));
if (res != CL_SUCCESS) {
printf("Failed to retrieve OpenCL platform IDs\n");
// TODO: Cleanup
return NULL;
}
res = clGetDeviceIDs(o->platformid, CL_DEVICE_TYPE_GPU, 1, &(o->deviceid), &(o->numdevices));
if (res != CL_SUCCESS) {
printf("Failed to retrieve OpenCL device IDs\n");
// TODO: Cleanup
return NULL;
}
o->ctx = clCreateContext(NULL, 1, &(o->deviceid), NULL, NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create OpenCL context\n");
// TODO: Cleanup
return NULL;
}
o->commqueue = clCreateCommandQueueWithProperties(o->ctx, o->deviceid, 0, &res);
if (res != CL_SUCCESS) {
printf("Failed to create OpenCL command queue\n");
// TODO: Cleanup
return NULL;
}
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &(o->gpuMemoryMax), NULL);
if (res != CL_SUCCESS) {
printf("Failed to query total GPU memory\n");
// TODO: CLeanup
return NULL;
}
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &(o->gpuLocalMax), NULL);
if (res != CL_SUCCESS) {
printf("Failed to query total GPU memory\n");
// TODO: CLeanup
return NULL;
}
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &(o->gpuAllocMax), NULL);
if (res != CL_SUCCESS) {
printf("Failed to query total GPU memory\n");
// TODO: CLeanup
return NULL;
}
res = clGetDeviceInfo(o->deviceid, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_ulong), &(o->gpuCUs), NULL);
if (res != CL_SUCCESS) {
printf("Failed to query total GPU memory\n");
// TODO: Cleanup
return NULL;
}
o->program = clCreateProgramWithSource(o->ctx, 1, (const char**) &(o->source_str), (const size_t*) &(o->source_size), &res);
if (res != CL_SUCCESS) {
printf("Failed to create OpenCL program\n");
// TODO: Cleanup
exit(1);
}
res = clBuildProgram(o->program, 1, &(o->deviceid), NULL, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Build failed\n");
// TODO: Cleanup
exit(1);
}
size_t loglen = 0;
res = clGetProgramBuildInfo(o->program, o->deviceid, CL_PROGRAM_BUILD_LOG, NULL, NULL, &loglen);
if (res != CL_SUCCESS) {
printf("Failed to retrieve build logs\n");
exit(1);
}
char* logbuf = malloc(sizeof(char) * loglen);
res = clGetProgramBuildInfo(o->program, o->deviceid, CL_PROGRAM_BUILD_LOG, sizeof(char) * loglen, logbuf, &loglen);
if (res != CL_SUCCESS) {
printf("Failed to retrieve build logs\n");
exit(1);
}
printf("%*.s\n", (int) loglen, logbuf);
free(logbuf);
o->kernel = clCreateKernel(o->program, "vectorSAT", &res);
if (res != CL_SUCCESS) {
printf("Failed to create kernel\n");
printf("%d\n", res);
// TODO: Cleanup
exit(1);
}
printf("Initialized solver:\n");
printf("\tCompute Units: %lu\n", o->gpuCUs);
printf("\tMax Global Memory: %lu\n", o->gpuMemoryMax);
printf("\tMax Local Memory: %lu\n", o->gpuLocalMax);
printf("\tMax Alloc Memory: %lu\n", o->gpuAllocMax);
return o;
}
i32 gpusolve2(gpusolver* gs, cnf* c) {
u32 wcnt = 1 + (c->cnts[0] >> 5U);
u32* solution = calloc((wcnt + 1), sizeof(u32));
if (solution == NULL) {
printf("Failed to allocate solution buffer\n");
exit(1);
}
mpz_t gmpmax;
mpz_init(gmpmax);
mpz_ui_pow_ui(gmpmax, 2, c->cnts[0]);
mpz_div_ui(gmpmax, gmpmax, gs->gpuCUs);
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
// mpz_out_str(stdout, 10, gmpmax);
// printf("\n\n");
mpz_clear(gmpmax);
cl_int res = 2;
cl_mem gpuheader = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, 2 * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF header buffer\n");
exit(1);
}
cl_mem gpulvars = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, 3 * c->cnts[1] * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF lvar buffer\n");
exit(1);
}
cl_mem gpuvariables = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF variable buffer\n");
exit(1);
}
cl_mem gpuparities = clCreateBuffer(gs->ctx, CL_MEM_READ_ONLY, c->cnts[2] * sizeof(cl_uchar), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create CNF parity buffer\n");
exit(1);
}
cl_mem gpuoutput = clCreateBuffer(gs->ctx, CL_MEM_READ_WRITE, (wcnt + 1) * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create output buffer\n");
exit(1);
}
cl_mem gpuscratchpad = clCreateBuffer(gs->ctx, CL_MEM_READ_WRITE, 2 * wcnt * gs->gpuCUs * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create output buffer\n");
exit(1);
}
// Load buffers to GPU
res = clEnqueueWriteBuffer(gs->commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF header write\n");
exit(1);
}
res = clEnqueueWriteBuffer(gs->commqueue, gpulvars, CL_TRUE, 0, 3 * c->cnts[1] * sizeof(cl_uint), c->clausedat, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF lvar write\n");
exit(1);
}
res = clEnqueueWriteBuffer(gs->commqueue, gpuvariables, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uint), c->variables, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF variable write\n");
exit(1);
}
res = clEnqueueWriteBuffer(gs->commqueue, gpuparities, CL_TRUE, 0, c->cnts[2] * sizeof(cl_uchar), c->parities, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF parity write\n");
exit(1);
}
res = clEnqueueWriteBuffer(gs->commqueue, gpuoutput, CL_TRUE, 0, (wcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue CNF parity write\n");
exit(1);
}
res = clSetKernelArg(gs->kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
res = clSetKernelArg(gs->kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
res = clSetKernelArg(gs->kernel, 2, sizeof(cl_mem), (void*) &gpuvariables);
res = clSetKernelArg(gs->kernel, 3, sizeof(cl_mem), (void*) &gpuparities);
res = clSetKernelArg(gs->kernel, 4, sizeof(cl_mem), (void*) &gpuoutput);
res = clSetKernelArg(gs->kernel, 5, sizeof(cl_mem), (void*) &gpuscratchpad);
size_t deploySize[2] = { gs->gpuCUs, 1 };
res = clEnqueueNDRangeKernel(gs->commqueue, gs->kernel, 1, NULL, &(gs->gpuCUs), &(gs->gpuCUs), 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to queue kernel for execution\n");
exit(res);
}
res = clEnqueueReadBuffer(gs->commqueue, gpuoutput, CL_TRUE, 0, (wcnt + 1) * sizeof(cl_uint), solution, 0, NULL, NULL);
if (res != CL_SUCCESS) {
printf("Failed to read kernel output\n");
exit(1);
}
// u64 endtime = utime();
if (solution[0] == 0) {
printf("UNSAT\n");
} else if (solution[0] == 1) {
printf("SAT: ");
for (u32 k = 0; k < c->cnts[0]; ++k) {
u32 vind = (c->cnts[0] - 1) - k;
u32 iind = vind >> 5U;
u32 bind = vind & 0b11111U;
u8 par = (solution[iind + 1] >> bind) & 1U;
printf("%u", par);
}
if (CHECKASGN) {
u8 checkres = 0;
for (u32 i = 0; i < c->cnts[1]; ++i) {
checkres = 0;
for (u32 j = 0; j < c->clausedat[3 * i + 1]; ++j) {
u32 v = c->variables[c->clausedat[3 * i] + j];
u32 vv = c->cnts[0] - 1;
u32 g = (vv - v) >> 5U;
u32 h = (vv - v) & 0b11111U;
u8 paract = (solution[g + 1] >> h) & 1U;
if (c->parities[c->clausedat[3 * i] + j] == paract) {
checkres = 1;
break;
}
}
if (!checkres) break;
}
if (checkres) {
printf(" \xE2\x9C\x93\n");
} else {
printf(" -\n");
}
}
} else {
printf("What the fuck???\n");
solution[0] = 3;
}
res = clReleaseMemObject(gpuheader);
res = clReleaseMemObject(gpulvars);
res = clReleaseMemObject(gpuvariables);
res = clReleaseMemObject(gpuparities);
res = clReleaseMemObject(gpuoutput);
res = clReleaseMemObject(gpuscratchpad);
i32 retval = (i32) solution[0];
free(solution);
return retval;
}
void freeSolver(gpusolver* gs) {
i32 res = 0;
res = clFlush(gs->commqueue);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clFinish(gs->commqueue);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseKernel(gs->kernel);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseProgram(gs->program);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseCommandQueue(gs->commqueue);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseContext(gs->ctx);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
res = clReleaseDevice(gs->deviceid);
if (res != CL_SUCCESS) {
printf("Failed to release solver\n");
return;
}
free(gs->source_str);
free(gs);
}
i32 gpusolve(cnf* c) { i32 gpusolve(cnf* c) {
cl_platform_id platformid = NULL; cl_platform_id platformid = NULL;
cl_device_id deviceid = NULL; cl_device_id deviceid = NULL;
@@ -45,6 +375,7 @@ i32 gpusolve(cnf* c) {
mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax); mpz_export(solution + 1, NULL, -1, sizeof(u32), 0, 0, gmpmax);
mpz_clear(gmpmax); mpz_clear(gmpmax);
// printf("%lu\n", wordcnt);
cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms); cl_int res = clGetPlatformIDs(1, &platformid, &numplatforms);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to retrieve OpenCL platform IDs\n"); printf("Failed to retrieve OpenCL platform IDs\n");
@@ -58,6 +389,36 @@ i32 gpusolve(cnf* c) {
} }
// printf("Found %u devices\n", numdevices); // printf("Found %u devices\n", numdevices);
u64 memoryMax = 0;
res = clGetDeviceInfo(deviceid, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &memoryMax, NULL);
if (res != CL_SUCCESS) {
printf("Failed to query GPU memory\n");
exit(1);
}
u64 localMax = 0;
res = clGetDeviceInfo(deviceid, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &localMax, NULL);
if (res != CL_SUCCESS) {
printf("Failed to query GPU memory\n");
exit(1);
}
u64 allocMax = 0;
res = clGetDeviceInfo(deviceid, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &allocMax, NULL);
if (res != CL_SUCCESS) {
printf("Failed to query GPU memory\n");
exit(1);
}
printf("GPU mem: %lu %lu %lu\n", memoryMax, localMax, allocMax);
size_t computeUnits = 0;
res = clGetDeviceInfo(deviceid, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_t), &computeUnits, NULL);
if (res != CL_SUCCESS) {
printf("Failed to query GPU memory\n");
exit(1);
}
printf("Compute Units: %lu\n", computeUnits);
cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res); cl_context context = clCreateContext(NULL, 1, &deviceid, NULL, NULL, &res);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
printf("Failed to create OpenCL context\n"); printf("Failed to create OpenCL context\n");
@@ -118,6 +479,12 @@ i32 gpusolve(cnf* c) {
exit(1); exit(1);
} }
cl_mem gpuscratchpad = clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * wordcnt * GLOBAL_SIZE * sizeof(cl_uint), NULL, &res);
if (res != CL_SUCCESS) {
printf("Failed to create output buffer\n");
exit(1);
}
// Load buffers to GPU // Load buffers to GPU
res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL); res = clEnqueueWriteBuffer(commqueue, gpuheader, CL_TRUE, 0, 2 * sizeof(cl_uint), c->cnts, 0, NULL, NULL);
if (res != CL_SUCCESS) { if (res != CL_SUCCESS) {
@@ -176,7 +543,7 @@ i32 gpusolve(cnf* c) {
size_t maxworkgrpu = 0; size_t maxworkgrpu = 0;
res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL); res = clGetKernelWorkGroupInfo(kernel, deviceid, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxworkgrpu, NULL);
// printf("Max work group size: %lu\n", maxworkgrpu); printf("Max work group size: %lu\n", maxworkgrpu);
res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader); res = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &gpuheader);
res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars); res = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &gpulvars);
@@ -185,7 +552,7 @@ i32 gpusolve(cnf* c) {
res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuoutput); res = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*) &gpuoutput);
res = clSetKernelArg(kernel, 5, 2 * wordcnt * sizeof(cl_uint) * LOCAL_SIZE, NULL); res = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*) &gpuscratchpad);
// u64 starttime = utime(); // u64 starttime = utime();
size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE }; size_t itemsize[2] = {GLOBAL_SIZE, LOCAL_SIZE };

View File

@@ -1,4 +1,27 @@
#pragma once #pragma once
#include "ncnf.h" #include "ncnf.h"
#include <CL/cl.h>
typedef struct {
cl_platform_id platformid;
cl_device_id deviceid;
cl_uint numdevices;
cl_uint numplatforms;
char* source_str;
size_t source_size;
cl_context ctx;
cl_command_queue commqueue;
cl_program program;
cl_kernel kernel;
u64 gpuMemoryMax;
u64 gpuLocalMax;
u64 gpuAllocMax;
u64 gpuCUs;
} gpusolver;
gpusolver* initSolver();
i32 gpusolve2(gpusolver* gs, cnf* c);
void freeSolver(gpusolver* gs);
i32 gpusolve(cnf* c); i32 gpusolve(cnf* c);

16
main.c
View File

@@ -250,17 +250,27 @@ int main() {
/* /*
cnf* c = readDIMACS("/home/lev/Downloads/logistics/logistics.d.cnf"); cnf* c = readDIMACS("/home/lev/Downloads/uf20/uf20-03.cnf");
sortlastnum(c); sortlastnum(c);
printf("%u\n", c->cnts[0]); // gpusolve(c);
gpusolve(c);
gpusolver* gs = initSolver();
if (gs == NULL) return -1;
i32 res = gpusolve2(gs, c);
freeSolver(gs);
freecnf(c); freecnf(c);
return 0; return 0;
*/ */
runTests(); runTests();
return 0; return 0;
/* /*

12
psat.cl
View File

@@ -61,7 +61,7 @@ void mul(uint* c, uint len, uint* a, uint b) {
} }
} }
__kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clausedat, __global const uint* vars, __global const uchar* pars, __global uint* output, __local uint* lctrs) { __kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clausedat, __global const uint* vars, __global const uchar* pars, __global uint* output, __global uint* lctrs) {
uint locid = get_local_id(0); uint locid = get_local_id(0);
uint locsz = get_local_size(0); uint locsz = get_local_size(0);
// uint grpid = get_group_id(0); // uint grpid = get_group_id(0);
@@ -76,21 +76,23 @@ __kernel void vectorSAT(__global const uint* cnfhdr, __global const uint* clause
uint addval = 0; uint addval = 0;
if (globid == 0) output[0] = 0;
uint* ctr = lctrs + wcnt * 2 * locid; output[0] = 0;
uint* max = lctrs + wcnt * (2 * locid + 1);
uint* ctr = lctrs + wcnt * 2 * globid;
uint* max = lctrs + wcnt * (2 * globid + 1);
for (uint i = 0; i < wcnt; ++i) { for (uint i = 0; i < wcnt; ++i) {
ctr[i] = max[i] = 0; ctr[i] = max[i] = 0;
} }
mul(ctr, wcnt, output + 1, globid); mul(ctr, wcnt, output + 1, globid);
if (globid == globsz) { if (globid == globsz - 1) {
stateaddpow(wcnt, max, cnfhdr[0]); stateaddpow(wcnt, max, cnfhdr[0]);
} else { } else {
mul(max, wcnt, output + 1, globid + 1); mul(max, wcnt, output + 1, globid + 1);
} }
// printf("%u %u\n", ctr[0], max[0]);
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);

View File

@@ -24,6 +24,7 @@ i32 runTests() {
i32 runuf20() { i32 runuf20() {
gpusolver* gs = initSolver();
// printf("Running against uf20\n"); // printf("Running against uf20\n");
u32 passed = 0; u32 passed = 0;
u64 tottime = 0; u64 tottime = 0;
@@ -36,7 +37,7 @@ i32 runuf20() {
sortlastnum(c); sortlastnum(c);
u64 start = utime(); u64 start = utime();
i32 res = gpusolve(c); i32 res = gpusolve2(gs, c);
u64 stop = utime(); u64 stop = utime();
tottime += (stop - start); tottime += (stop - start);
@@ -46,10 +47,12 @@ i32 runuf20() {
// printf("Passed %u / 1000 tests\n", passed); // printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); // printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0; if (passed == 1000) return 0;
freeSolver(gs);
return 1; return 1;
} }
i32 runuf50() { i32 runuf50() {
gpusolver* gs = initSolver();
// printf("Running against uf50\n"); // printf("Running against uf50\n");
u32 passed = 0; u32 passed = 0;
u64 tottime = 0; u64 tottime = 0;
@@ -62,7 +65,7 @@ i32 runuf50() {
sortlastnum(c); sortlastnum(c);
u64 start = utime(); u64 start = utime();
i32 res = gpusolve(c); i32 res = gpusolve2(gs, c);
u64 stop = utime(); u64 stop = utime();
tottime += (stop - start); tottime += (stop - start);
@@ -72,10 +75,12 @@ i32 runuf50() {
// printf("Passed %u / 1000 tests\n", passed); // printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); // printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0; if (passed == 1000) return 0;
freeSolver(gs);
return 1; return 1;
} }
i32 runuuf50() { i32 runuuf50() {
gpusolver* gs = initSolver();
// printf("Running against uuf50\n"); // printf("Running against uuf50\n");
u32 passed = 0; u32 passed = 0;
u64 tottime = 0; u64 tottime = 0;
@@ -88,7 +93,7 @@ i32 runuuf50() {
sortlastnum(c); sortlastnum(c);
u64 start = utime(); u64 start = utime();
i32 res = gpusolve(c); i32 res = gpusolve2(gs, c);
u64 stop = utime(); u64 stop = utime();
tottime += (stop - start); tottime += (stop - start);
@@ -98,5 +103,6 @@ i32 runuuf50() {
// printf("Passed %u / 1000 tests\n", passed); // printf("Passed %u / 1000 tests\n", passed);
// printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0); // printf("Took %lf s total, %lf s on avg\n", ((f64) tottime) / 1000000.0, ((f64) tottime) / 1000000000.0);
if (passed == 1000) return 0; if (passed == 1000) return 0;
freeSolver(gs);
return 1; return 1;
} }