10.1
Main Application
void vector_scal_cpu(float *val, unsigned n, float factor)
{
unsigned i;
for (i = 0; i < n; i++)
val[i] *= factor;
}
#define NX 2048
int main(void)
{
float *vector;
unsigned i;
vector = malloc(sizeof(vector[0]) * NX);
for (i = 0; i < NX; i++)
vector[i] = 1.0f;
fprintf(stderr, "BEFORE : First element was %f\n", vector[0]);
float factor = 3.14;
vector_scal_cpu(vector, NX, factor);
fprintf(stderr, "AFTER First element is %f\n", vector[0]);
free(vector);
return 0;
}
10.2
CPU Kernel
10.3
CUDA Kernel
static __global__ void vector_mult_cuda(unsigned n, float *val, float factor)
{
unsigned i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n)
val[i] *= factor;
}
extern "C" void scal_cuda_func(void *buffers[], void *_args)
{
float *factor = (float *)_args;
unsigned threads_per_block = 64;
unsigned nblocks = (n + threads_per_block-1) / threads_per_block;
cudaError_t status = cudaGetLastError();
}
cudaStream_t starpu_cuda_get_local_stream(void)
#define STARPU_CUDA_REPORT_ERROR(status)
Definition starpu_cuda.h:60
#define STARPU_VECTOR_GET_NX(interface)
Definition starpu_data_interfaces.h:2100
#define STARPU_VECTOR_GET_PTR(interface)
Definition starpu_data_interfaces.h:2084
10.4
OpenCL Kernel
10.4.1
Invoking the Kernel
void scal_opencl_func(void *buffers[], void *_args)
{
float *factor = _args;
int id, devid, err;
cl_kernel kernel;
cl_command_queue queue;
cl_event event;
{
"vector_mult_opencl",
devid);
err = clSetKernelArg(kernel, 0, sizeof(n), &n);
err |= clSetKernelArg(kernel, 1, sizeof(val), &val);
err |= clSetKernelArg(kernel, 2, sizeof(*factor), factor);
}
{
size_t global=n;
size_t local;
size_t s;
cl_device_id device;
err = clGetKernelWorkGroupInfo (kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, &s);
if (local > global) local=global;
else global = (global + local-1) / local * local;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, &event);
}
{
clFinish(queue);
clReleaseEvent(event);
}
}
#define STARPU_VECTOR_GET_DEV_HANDLE(interface)
Definition starpu_data_interfaces.h:2090
cl_program programs[STARPU_MAXOPENCLDEVS]
Definition starpu_opencl.h:48
int starpu_opencl_load_kernel(cl_kernel *kernel, cl_command_queue *queue, struct starpu_opencl_program *opencl_programs, const char *kernel_name, int devid)
#define STARPU_OPENCL_REPORT_ERROR(status)
Definition starpu_opencl.h:276
int starpu_opencl_release_kernel(cl_kernel kernel)
void starpu_opencl_get_device(int devid, cl_device_id *device)
int starpu_opencl_collect_stats(cl_event event)
Definition starpu_opencl.h:46
int starpu_worker_get_devid(int id)
int starpu_worker_get_id(void)
10.4.2
Source of the Kernel
__kernel void vector_mult_opencl(int nx, __global float* val, float factor)
{
const int i = get_global_id(0);
if (i < nx)
{
val[i] *= factor;
}
}