Hi all
I have recently started to use Mex n c,
I have created a mex file using the auotgenrated function in MATLAB, however I started writing mine to make it more efficient as my code still expensive.
For some reason I do get a discrepancy between my c code and MATLAB function which can reach 20%. I do not think I have abog in the code. I do think is something to do with memory allocation and index handling It could be a bug also. I would appreciate any help. Thanks a lot.
I will add the matlab code and the c code.
========================== % .m code
function [Vxs,Vys,Vzs] = Induced_Velocity_Tret_double(X0,Y0,Z0 ,X,Y,Z,Gamma, dt)
% allocate meomery
[Nc,Mc] = size(X0);
[Ns,Ms] = size(X);
Vxs = zeros(Nc,Mc); Vys = zeros(Nc,Mc); Vzs = zeros(Nc,Mc);
cnst1 = 4 .* pi;
%% Correction terms
alpha = 1.25643;
visc = 1.802e-5; % static ari viscuicty at 15 deg
r02 = 0.0001;
a1 = 2e-4;
%% start calcauting induced veocity
for i0 = 1:Nc
for j0 = 1:Mc
for i = 1:Ns-1
t = double(i) .* dt ;
for j = 1:Ms-1
cnst2 = Gamma(i,j)./ cnst1;
% correction terms
delta = 1 + a1 .* sqrt(Gamma(i,j).^2./visc);
rc = sqrt(r02+(4.*alpha.*t.*visc.*delta));
%—— line 1
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i,j), Y(i,j), Z(i,j), X(i,j+1), Y(i,j+1), Z(i,j+1), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%—— line 2
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i,j+1), Y(i,j+1), Z(i,j+1), X(i+1,j+1),Y(i+1,j+1),Z(i+1,j+1), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%—— line 3
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i+1,j+1),Y(i+1,j+1),Z(i+1,j+1),X(i+1,j), Y(i+1,j), Z(i+1,j), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
%—— line4
[Vx,Vy,Vz]=Vind_LINE(X0(i0,j0),Y0(i0,j0),Z0(i0,j0), X(i+1,j), Y(i+1,j), Z(i+1,j), X(i,j), Y(i,j), Z(i,j), cnst2, rc);
Vxs(i0,j0)=Vxs(i0,j0)+Vx; Vys(i0,j0)=Vys(i0,j0)+Vy; Vzs(i0,j0)=Vzs(i0,j0)+Vz;
end
end
end
end
end
function [Vx,Vy,Vz] = Vind_LINE(x0,y0,z0,x1,y1,z1,x2,y2,z2,cnst, rc)
tol = 1e-8;
Vx = 0.;Vy = 0.;Vz = 0.;
rx1 = x0-x1; ry1 = y0-y1; rz1 = z0-z1;
rx2 = x0-x2; ry2 = y0-y2; rz2 = z0-z2;
crsr1r2x = (ry1 .* rz2) – (rz1 .* ry2);
crsr1r2y = (rz1 .* rx2) – (rx1 .* rz2);
crsr1r2z = (rx1 .* ry2) – (ry1 .* rx2);
magcrsr1r2 = (crsr1r2x.^2 + crsr1r2y.^2 + crsr1r2z.^2);
if magcrsr1r2 > tol
Frc1x = crsr1r2x ./ magcrsr1r2;
Frc1y = crsr1r2y ./ magcrsr1r2;
Frc1z = crsr1r2z ./ magcrsr1r2;
magr1 = sqrt(rx1.^2 + ry1.^2 + rz1.^2);
magr2 = sqrt(rx2.^2 + ry2.^2 + rz2.^2);
r1minusr2x = rx1-rx2;
r1minusr2y = ry1-ry2;
r1minusr2z = rz1-rz2;
Frc2 = r1minusr2x.* ((rx1./magr1)-(rx2./magr2)) + r1minusr2y.*((ry1./magr1)-(ry2./magr2)) + r1minusr2z.*((rz1./magr1)-(rz2./magr2)) ;
%% correction term
magr0 = sqrt(r1minusr2x.^2 + r1minusr2y.^2 + r1minusr2z.^2);
h = magcrsr1r2 ./ magr0;
k = h.^2./(rc.^2 +h.^2);
%%
Vx = cnst .* Frc1x .* Frc2 .* k;
Vy = cnst .* Frc1y .* Frc2 .* k;
Vz = cnst .* Frc1z .* Frc2 .* k;
end
end
========================== % .c mex code
#include <stdio.h>
#include <math.h>
#include "mex.h"
#include "matrix.h"
#include <omp.h>
static void Vind_LINE(double x0, double b_y0, double z0, double x1, double b_y1,
double z1, double x2, double y2, double z2, double cnst,
double rc, double *Vx, double *Vy, double *Vz)
{
double rx1; double ry1; double rz1; double rx2; double ry2; double rz2;
double crsr1r2x; double crsr1r2y; double crsr1r2z; double magcrsr1r2;
double magr1; double magr2; double r1minusr2x; double r1minusr2y; double r1minusr2z;
*Vx = 0.0;
*Vy = 0.0;
*Vz = 0.0;
/* Bounde vortex */
/* r0x = x2-x1; r0y = y2-y1; r0z = z2-z1; */
rx1 = x0 – x1;
ry1 = b_y0 – b_y1;
rz1 = z0 – z1;
rx2 = x0 – x2;
ry2 = b_y0 – y2;
rz2 = z0 – z2;
crsr1r2x = ry1 * rz2 – rz1 * ry2;
crsr1r2y = rz1 * rx2 – rx1 * rz2;
crsr1r2z = rx1 * ry2 – ry1 * rx2;
magcrsr1r2 = (crsr1r2x * crsr1r2x + crsr1r2y * crsr1r2y) + crsr1r2z * crsr1r2z;
if (magcrsr1r2 > 1.0E-8) {
magr1 = sqrt((rx1 * rx1 + ry1 * ry1) + rz1 * rz1);
magr2 = sqrt((rx2 * rx2 + ry2 * ry2) + rz2 * rz2);
r1minusr2x = rx1 – rx2;
r1minusr2y = ry1 – ry2;
r1minusr2z = rz1 – rz2;
magr2 = (r1minusr2x * (rx1 / magr1 – rx2 / magr2) + r1minusr2y * (ry1 /
magr1 – ry2 / magr2)) + r1minusr2z * (rz1 / magr1 – rz2 / magr2);
/* %% correction term */
magr1 = magcrsr1r2 / sqrt((r1minusr2x * r1minusr2x + r1minusr2y * r1minusr2y)
+ r1minusr2z * r1minusr2z);
magr1 *= magr1;
magr1 /= rc * rc + magr1;
/* %% */
*Vx = cnst * (crsr1r2x / magcrsr1r2) * magr2 * magr1;
*Vy = cnst * (crsr1r2y / magcrsr1r2) * magr2 * magr1;
*Vz = cnst * (crsr1r2z / magcrsr1r2) * magr2 * magr1;
}
}
/*
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[])
{
double *X0, *Y0, *Z0, *X, *Y, *Z, *Gamma;
double dt;
int Nc, Mc , Ns, Ms, NGamma, Nss, NV;
X0 = mxGetDoubles (prhs[0]);
Nc = mxGetM(prhs[0]);
Mc = mxGetN(prhs[0]);
Y0 = mxGetDoubles (prhs[1]);
Z0 = mxGetDoubles (prhs[2]);
X = mxGetDoubles (prhs[3]);
Ns = mxGetM(prhs[3]);
Ms = mxGetN(prhs[3]);
Y = mxGetDoubles (prhs[4]);
Z = mxGetDoubles (prhs[5]);
Gamma = mxGetDoubles (prhs[6]);
NGamma = mxGetM(prhs[6]);
dt = mxGetScalar (prhs[7]);
/* OUTPUTS */
plhs[0] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
plhs[1] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
plhs[2] = mxCreateDoubleMatrix(Nc, Mc, mxREAL);
//double Vxs[Nc][Mc], Vys[Nc][Mc], Vzs[Nc][Mc];
// double *Vxs, *Vys, *Vzs;
double* Vxs = (double*) mxGetDoubles(plhs[0]);
double* Vys = (double*) mxGetDoubles(plhs[1]);
double* Vzs = (double*) mxGetDoubles(plhs[2]);
int i0; int loop_ub; int i1; int b_j0; int i2; int i; double t; int i3; int j;
double cnst2; double rc; double Vx; double Vy; double Vz;
for (loop_ub = 0; loop_ub < Nc; loop_ub++) {
for (b_j0 = 0; b_j0 < Mc; b_j0++) {
for (i = 0; i <= Ns – 2; i++) {
i3 = Ms;
for (j = 0; j <= i3 – 2; j++) {
cnst2 = Gamma[i + NGamma * j] / 12.566370614359172;
/* correction terms */
rc = Gamma[i + NGamma * j] ;
rc = sqrt(0.0001 + 5.02572 * t * 1.802E-5 * (1.0 + 0.0002 * sqrt(rc *
rc / 1.802E-5)));
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[i + Ns * j], Y[i + Ns * j], Z[i + Ns * j],
X[i + Ns * (j + 1)], Y[i + Ns * (j + 1)], Z[i +Ns * (j + 1)],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[i + Ns * (j + 1)], Y[i + Ns *(j + 1)], Z[i + Ns * (j + 1)],
X[(i + Ns * (j + 1)) + 1], Y[(i + Ns * (j + 1)) + 1], Z[(i + Ns * (j + 1)) + 1],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[(i + Ns * (j + 1)) + 1], Y[(i + Ns * (j + 1)) + 1], Z[(i + Ns * (j +1)) + 1],
X[(i + Ns * j) + 1], Y[(i + Ns * j) + 1], Z[(i + Ns * j) + 1],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
Vind_LINE(X0[loop_ub + Nc * b_j0], Y0[loop_ub + Nc * b_j0], Z0[loop_ub + Nc * b_j0],
X[(i +Ns * j) + 1], Y[(i+ Ns * j) + 1], Z[(i + Ns * j) + 1],
X[i + Ns * j], Y[i + Ns * j], Z[i + Ns * j],
cnst2, rc, &Vx, &Vy, &Vz);
Vxs[loop_ub + Nc * b_j0] += Vx;
Vys[loop_ub + Nc * b_j0] += Vy;
Vzs[loop_ub + Nc * b_j0] += Vz;
}
}
}
}
mxFree(Vxs);
mxFree(Vys);
mxFree(Vzs);
}
/*
* File trailer for Induced_Velocity_Tret3.c
*
* [EOF]
*/
Best Answer