Hi,
I'm trying to write a MEX gateway function (in CUDA) to add two complex integer arrays given by Matlab. Currently, the following code works fine for 2 complex float arrays. Could you please let me know how should i change the code to be able to read complex integer from Matlab? it should be about the way i define prhs!!
#include <cuda_runtime.h>#include "device_launch_parameters.h"#include <stdio.h>#include "cuda.h"#include <iostream>#include <mex.h>#include "gpu/mxGPUArray.h"#include "matrix.h"#include <thrust/complex.h>#include <string.h>//#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }////inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)//{// if (code != cudaSuccess)// {// fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);// if (abort) exit(code);// }//}//typedef thrust::complex<float> fcomp;__device__ void atAddComplex(fcomp* a, fcomp b) { float* x = (float*)a; /* cast x pointer to the real part */ float* y = x + 1; /* cast the y pointer to the following mem. address (imaginary part) */ //use atomicAdd for double variables atomicAdd(x, b.real()); atomicAdd(y, b.imag());}__global__ void add(fcomp * Device_DataRes, fcomp * Device_Data1, fcomp * Device_Data2, int N) { int TID = threadIdx.y * blockDim.x + threadIdx.x; int BlockOFFset = blockDim.x * blockDim.y * blockIdx.x; int GID_RowBased = BlockOFFset + TID; if (GID_RowBased < N) { //Device_DataRes[GID_RowBased] = Device_Data1[GID_RowBased] + Device_Data2[GID_RowBased]; //Device_Data1[GID_RowBased] = Device_Data1[GID_RowBased] + Device_Data2[GID_RowBased]; atAddComplex(&Device_Data1[GID_RowBased], Device_Data2[GID_RowBased]); // atomicAdd(&Device_Data1[GID_RowBased], Device_Data2[GID_RowBased]); }}void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { mxInitGPU(); int N = 1000; int ArrayByteSize = sizeof(fcomp) * N; fcomp* Device_Data1; fcomp* Device_Data2; fcomp* DataRes; fcomp* Device_DataRes; mxComplexSingle* Data1 = mxGetComplexSingles(prhs[0]); mxComplexSingle* Data2 = mxGetComplexSingles(prhs[1]); (cudaMalloc((void**)&Device_Data1, ArrayByteSize)); (cudaMemcpy(Device_Data1, Data1, ArrayByteSize, cud SoaMemcpyHostToDevice)); (cudaMalloc((void**)&Device_Data2, ArrayByteSize)); (cudaMemcpy(Device_Data2, Data2, ArrayByteSize, cudaMemcpyHostToDevice)); plhs[0] = mxCreateNumericMatrix(N, 1, mxSINGLE_CLASS, mxCOMPLEX); DataRes = static_cast<fcomp*> (mxGetData(plhs[0])); (cudaMalloc((void**)&Device_DataRes, ArrayByteSize)); dim3 block(1024); int GridX = (N / block.x + 1); dim3 grid(GridX);//SystemSetup.NumberOfTransmitter add << <grid, block >> > (Device_DataRes, Device_Data1, Device_Data2, N); (cudaMemcpy(DataRes, Device_Data1, ArrayByteSize, cudaMemcpyDeviceToHost)); cudaFree(Device_Data1); cudaFree(Device_Data2); cudaFree(Device_DataRes); //mxGPUDestroyGPUArray(MediumX);}
Best Answer