// array multiplication on the device: C = A * B __global__ void ArrayMul( float *A, float *B, float *C ) { int gid = blockIdx.x*blockDim.x + threadIdx.x; if( gid < DATASET_SIZE ) C[gid] = A[gid] * B[gid]; }