Commit c3b23fcd authored by Pierre Paleo's avatar Pierre Paleo
Browse files

Cuda: update ElementOp.cu with generic elementwise 2D operation

parent 350a2bef
......@@ -2,6 +2,18 @@
typedef pycuda::complex<float> complex;
// Generic operations
#define OP_ADD 0
#define OP_SUB 1
#define OP_MUL 2
#define OP_DIV 3
//
#ifndef GENERIC_OP
#define GENERIC_OP OP_ADD
#endif
// arr2D *= arr1D (line by line, i.e along fast dim)
__global__ void inplace_complex_mul_2Dby1D(complex* arr2D, complex* arr1D, int width, int height) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -17,6 +29,23 @@ __global__ void inplace_complex_mul_2Dby1D(complex* arr2D, complex* arr1D, int w
arr2D[i]._M_im = a._M_im * b._M_re + a._M_re * b._M_im;
}
__global__ void inplace_generic_op_2Dby2D(float* arr2D, float* arr2D_other, int width, int height) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if ((x >= width) || (y >= height)) return;
int i = y*width + x;
#if GENERIC_OP == OP_ADD
arr2D[i] += arr2D_other[i];
#elif GENERIC_OP == OP_SUB
arr2D[i] -= arr2D_other[i];
#elif GENERIC_OP == OP_MUL
arr2D[i] *= arr2D_other[i];
#elif GENERIC_OP == OP_DIV
arr2D[i] /= arr2D_other[i];
#endif
}
// arr3D *= arr1D (along fast dim)
__global__ void inplace_complex_mul_3Dby1D(complex* arr3D, complex* arr1D, int width, int height, int depth) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment