MATLAB: Mex file c. Getting different results from matlab

cMATLABmemorymex

Hi all
I have recently started to use Mex n c,
I have created a mex file using the auotgenrated function in MATLAB, however I started writing mine to make it more efficient as my code still expensive.
For some reason I do get a discrepancy between my c code and MATLAB function which can reach 20%. I do not think I have abog in the code. I do think is something to do with memory allocation and index handling It could be a bug also. I would appreciate any help. Thanks a lot.
I will add the matlab code and the c code.
========================== % .m code
function [Vxs,Vys,Vzs] = Induced_Velocity_Tret_double(X0,Y0,Z0 ,X,Y,Z,Gamma, dt)
% allocate meomery
[Nc,Mc] = size(X0);
[Ns,Ms] = size(X);
Vxs = zeros(Nc,Mc); Vys = zeros(Nc,Mc); Vzs = zeros(Nc,Mc);
cnst1 = 4 .* pi;
%% Correction terms
alpha = 1.25643;
visc = 1.802e-5; % static ari viscuicty at 15 deg
r02 = 0.0001;
a1 = 2e-4;
%% start calcauting induced veocity
for i0 = 1:Nc
for j0 = 1:Mc
for i = 1:Ns-1
t = double(i) .* dt ;
for j = 1:Ms-1
cnst2 = Gamma(i,j)./ cnst1;
% correction terms
delta = 1 + a1 .* sqrt(Gamma(i,j).^2./visc);
rc = sqrt(r02+(4.*alpha.*t.*visc.*delta));
%—— line 1
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i,j), Y(i,j), Z(i,j), X(i,j+1), Y(i,j+1), Z(i,j+1), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%—— line 2
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i,j+1), Y(i,j+1), Z(i,j+1), X(i+1,j+1),Y(i+1,j+1),Z(i+1,j+1), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%—— line 3
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i+1,j+1),Y(i+1,j+1),Z(i+1,j+1),X(i+1,j), Y(i+1,j), Z(i+1,j), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%—— line4
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i+1,j), Y(i+1,j), Z(i+1,j), X(i,j), Y(i,j), Z(i,j), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
end
end
end
end
end
function [Vx,Vy,Vz] = Vind_LINE(x0,y0,z0,x1,y1,z1,x2,y2,z2,cnst, rc)
tol = 1e-8;
Vx = 0.;Vy = 0.;Vz = 0.;
rx1 = x0-x1; ry1 = y0-y1; rz1 = z0-z1;
rx2 = x0-x2; ry2 = y0-y2; rz2 = z0-z2;
crsr1r2x = (ry1 .* rz2) – (rz1 .* ry2);
crsr1r2y = (rz1 .* rx2) – (rx1 .* rz2);
crsr1r2z = (rx1 .* ry2) – (ry1 .* rx2);
magcrsr1r2 = (crsr1r2x.^2 + crsr1r2y.^2 + crsr1r2z.^2);
if magcrsr1r2 > tol
Frc1x = crsr1r2x ./ magcrsr1r2;
Frc1y = crsr1r2y ./ magcrsr1r2;
Frc1z = crsr1r2z ./ magcrsr1r2;
magr1 = sqrt(rx1.^2 + ry1.^2 + rz1.^2);
magr2 = sqrt(rx2.^2 + ry2.^2 + rz2.^2);
r1minusr2x = rx1-rx2;
r1minusr2y = ry1-ry2;
r1minusr2z = rz1-rz2;
Frc2 = r1minusr2x.* ((rx1./magr1)-(rx2./magr2)) + r1minusr2y.*((ry1./magr1)-(ry2./magr2)) + r1minusr2z.*((rz1./magr1)-(rz2./magr2)) ;
%% correction term
magr0 = sqrt(r1minusr2x.^2 + r1minusr2y.^2 + r1minusr2z.^2);
h = magcrsr1r2 ./ magr0;
k = h.^2./(rc.^2 +h.^2);
%%
Vx = cnst .* Frc1x .* Frc2 .* k;
Vy = cnst .* Frc1y .* Frc2 .* k;
Vz = cnst .* Frc1z .* Frc2 .* k;
end
end
========================== % .c mex code
#include <stdio.h>
#include <math.h>
#include "mex.h"
#include "matrix.h"
#include <omp.h>
static void Vind_LINE(double x0, double b_y0, double z0, double x1, double b_y1,
double z1, double x2, double y2, double z2, double cnst,
double rc, double *Vx, double *Vy, double *Vz)
{
double rx1; double ry1; double rz1; double rx2; double ry2; double rz2;
double crsr1r2x; double crsr1r2y; double crsr1r2z; double magcrsr1r2;
double magr1; double magr2; double r1minusr2x; double r1minusr2y; double r1minusr2z;
*Vx = 0.0;
*Vy = 0.0;
*Vz = 0.0;
/* Bounde vortex */
/* r0x = x2-x1; r0y = y2-y1; r0z = z2-z1; */
rx1 = x0 – x1;
ry1 = b_y0 – b_y1;
rz1 = z0 – z1;
rx2 = x0 – x2;
ry2 = b_y0 – y2;
rz2 = z0 – z2;
crsr1r2x = ry1 * rz2 – rz1 * ry2;
crsr1r2y = rz1 * rx2 – rx1 * rz2;
crsr1r2z = rx1 * ry2 – ry1 * rx2;
magcrsr1r2 = (crsr1r2x * crsr1r2x + crsr1r2y * crsr1r2y) + crsr1r2z * crsr1r2z;
if (magcrsr1r2 > 1.0E-8) {
magr1 = sqrt((rx1 * rx1 + ry1 * ry1) + rz1 * rz1);
magr2 = sqrt((rx2 * rx2 + ry2 * ry2) + rz2 * rz2);
r1minusr2x = rx1 – rx2;
r1minusr2y = ry1 – ry2;
r1minusr2z = rz1 – rz2;
magr2 = (r1minusr2x * (rx1 / magr1 – rx2 / magr2) + r1minusr2y * (ry1 /
magr1 – ry2 / magr2)) + r1minusr2z * (rz1 / magr1 – rz2 / magr2);
/* %% correction term */
magr1 = magcrsr1r2 / sqrt((r1minusr2x * r1minusr2x + r1minusr2y * r1minusr2y)
+ r1minusr2z * r1minusr2z);
magr1 *= magr1;
magr1 /= rc * rc + magr1;
/* %% */
*Vx = cnst * (crsr1r2x / magcrsr1r2) * magr2 * magr1;
*Vy = cnst * (crsr1r2y / magcrsr1r2) * magr2 * magr1;
*Vz = cnst * (crsr1r2z / magcrsr1r2) * magr2 * magr1;
}
}
/*
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[])
{
double *X0, *Y0, *Z0, *X, *Y, *Z, *Gamma;
double dt;
int Nc, Mc , Ns, Ms, NGamma, Nss, NV;
X0 = mxGetDoubles (prhs[0]);
Nc = mxGetM(prhs[0]);
Mc = mxGetN(prhs[0]);
Y0 = mxGetDoubles (prhs[1]);
Z0 = mxGetDoubles (prhs[2]);
X = mxGetDoubles (prhs[3]);
Ns = mxGetM(prhs[3]);
Ms = mxGetN(prhs[3]);
Y = mxGetDoubles (prhs[4]);
Z = mxGetDoubles (prhs[5]);
Gamma = mxGetDoubles (prhs[6]);
NGamma = mxGetM(prhs[6]);
dt = mxGetScalar (prhs[7]);
/* OUTPUTS */
plhs[0] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
plhs[1] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
plhs[2] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
//double Vxs[Nc][Mc], Vys[Nc][Mc], Vzs[Nc][Mc];
// double *Vxs, *Vys, *Vzs;
double* Vxs = (double*) mxGetDoubles(plhs[0]);
double* Vys = (double*) mxGetDoubles(plhs[1]);
double* Vzs = (double*) mxGetDoubles(plhs[2]);
int i0; int loop_ub; int i1; int b_j0; int i2; int i; double t; int i3; int j;
double cnst2; double rc; double Vx; double Vy; double Vz;
for (loop_ub = 0; loop_ub < Nc; loop_ub++) {
for (b_j0 = 0; b_j0 < Mc; b_j0++) {
for (i = 0; i <= Ns – 2; i++) {
i3 = Ms;
for (j = 0; j <= i3 – 2; j++) {
cnst2 = Gamma[i + NGamma * j] / 12.566370614359172;
/* correction terms */
rc = Gamma[i + NGamma * j] ;
rc = sqrt(0.0001 + 5.02572 * t * 1.802E-5 * (1.0 + 0.0002 * sqrt(rc *
rc / 1.802E-5)));
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[i + Ns * j], Y[i + Ns * j], Z[i + Ns * j],
X[i + Ns * (j + 1)], Y[i + Ns * (j + 1)], Z[i +Ns * (j + 1)],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[i + Ns * (j + 1)], Y[i + Ns *(j + 1)], Z[i + Ns * (j + 1)],
X[(i + Ns * (j + 1)) + 1], Y[(i + Ns * (j + 1)) + 1], Z[(i + Ns * (j + 1)) + 1],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[(i + Ns * (j + 1)) + 1], Y[(i + Ns * (j + 1)) + 1], Z[(i + Ns * (j +1)) + 1],
X[(i + Ns * j) + 1], Y[(i + Ns * j) + 1], Z[(i + Ns * j) + 1],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[(i +Ns * j) + 1], Y[(i+ Ns * j) + 1], Z[(i + Ns * j) + 1],
X[i + Ns * j], Y[i + Ns * j], Z[i + Ns * j],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
}
}
}
}
mxFree(Vxs);
mxFree(Vys);
mxFree(Vzs);
}
/*
* File trailer for Induced_Velocity_Tret3.c
*
* [EOF]
*/

Best Answer

MATLAB stores matrices in column order. E.g., if A is a 2x3 matrix, and p is a pointer to the data area of A in a mex routine, then MATLAB will store the elements in memory as follows:
p[0] = A(1,1)
p[1] = A(2,1)
p[2] = A(1,2)
p[3] = A(2,2)
p[4] = A(1,3)
p[5] = A(2,3)
If real double full matrix A had generic dimensions of MxN, then accessing the elements linearly would be as follows with appropriate variable definitions:
M = mxGetM(prhs[0]);
N = mxGetN(prhs[0]);
p = mxGetDoubles(prhs[0]);
k = 0;
for( j=0; j<N; j++ ) {
for( i=0; i<M; i++ ) {
mexPrintf("p[%d] = A(%d,%d) = %g\n",k++,i+1,j+1,p[i+M*j]);
}
}
You can use OpenMP in mex routines, but you cannot do anything that invokes the MATLAB Memory Manager inside your parallel threads. So calling inspection routines such as the following would be OK inside these threads:
mxGetM
mxGetN
mxGetNumberOfDimensions
etc.
But doing any allocation/deallocation would not be OK inside the parallel threads:
mxCreateDoubleMatrix
mxDestroyArray
etc.
You must do all MATLAB API allocation/deallocation outside the parallel threads.