Hi, I am writing a simple MEX function that performs a multiplication and summation of a specified matrix column vector on another vector. I've been able to get a 500 times speed up by writing the MEX function but I've been getting some mixed results. Basically, if I add (or remove) mexPrintf lines to print out variables, it completely changes the output results and I'm quite lost. It must be some issue with pointers and referencing since using mexPrintf shouldn't change my actual answers. The result that I get is complete garbage irregardless (I get 10^276, 10^358, -Inf, NaN sort of numbers).
I'm at a point where I simply don't know what I'm doing wrong and I need someone else to look at the code and see if there's anything obviously wrong. It seems like the problem is when I'm using the funciton: "sparseMatColVec" to calculate the variable "int_dv" in the main mexFunction program is when I get garbages. i.e. if I say replace the call of sparseMatColVec and simply assign int_dv = 1.0 where ever sparseMatColVec is called, I get the expected answer. When I use sparseMatColVec to calculate "int_dv", I get 10^276, 10^358, -Inf, NaN sort of numbers).
I'm attaching my code below. Thank you.
NOTE: The funciton, "sparseMatColVec" is just a short subroutine that will perform the column vector, vector multiplication and summation. The gateway mexFunction follows directly below it.
//========================================================================
// This code will perform a fast C++ Row vector,RHS vector multiplication for the calculation of the first three velocity moments of the kinetic system at physical cell faces.
#include "mex.h"
#include "math.h"
#include "matrix.h"
// Subfunction to actually perform the matrix row vector, vector multiplication for the specified row index
double sparseMatColVec(double* f, mwIndex* ir, mwIndex* jc, double* s, int ncol, int c_id)
{
double int_dv; double rhs; int start= jc[c_id]; int stop = jc[c_id+1]; for (int k = start; k < stop; k++) { // Loop through non-zeros in c_id'th column // rhs = s[k]*f[ir[k]]; int_dv += rhs; // mexPrintf(""); // <- I don't know why but by either adding or removing this comment will make "some" difference in the result. Utterly confused. } return int_dv;}
// The main MATLAB gateway function
void mexFunction( int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
// Inputs: // 0: A_rhs note: this is the transposed matrix of original matrix // 1: A_lhs note: this is the transposed matrix of original matrix // 2: f note: RHS of linear system // 3: f_bl note: left B.C. array // 4: f_br note: right B.C. array // 5: nx note: number of grids in physical space // 6: dx note: grid size // 7: nvx note: number of grids in velocity space // 8: bc_l note: boundary condition type integer left // 9: bc_r note: boundary condition type integer right // 10: v note: array for velocity space grid // 11: vop note: constant of multiplication // 12: dv note: velocity space grid size // Outputs // 0: nu_flux note: nu flux at cell faces // 1: S2_flux note: S2 flux at cell faces // 2: S3_flux note: S3 flux at cell faces // A_rhs matrix pointer int ncol_rhs= mxGetN(prhs[0]); mwIndex* ir_rhs = mxGetIr(prhs[0]); mwIndex* jc_rhs = mxGetJc(prhs[0]); double* s_rhs = mxGetPr(prhs[0]); // A_lhs matrix pointers int ncol_lhs= mxGetN(prhs[1]); mwIndex* ir_lhs = mxGetIr(prhs[1]); mwIndex* jc_lhs = mxGetJc(prhs[1]); double* s_lhs = mxGetPr(prhs[1]); // Extract inputs double* f = mxGetPr(prhs[2]); double* f_bl = mxGetPr(prhs[3]); double* f_br = mxGetPr(prhs[4]); int nx = mxGetScalar(prhs[5]); double* dx = mxGetPr(prhs[6]); int nvx = mxGetScalar(prhs[7]); int bc_l = mxGetScalar(prhs[8]); int bc_r = mxGetScalar(prhs[9]); double* v = mxGetPr(prhs[10]); double vop = mxGetScalar(prhs[11]); double dv = mxGetScalar(prhs[12]); // Initialize outputs plhs[0] = mxCreateDoubleMatrix(nx+1,1,mxREAL); plhs[1] = mxCreateDoubleMatrix(nx+1,1,mxREAL); plhs[2] = mxCreateDoubleMatrix(nx+1,1,mxREAL); // Associate output pointer double* nu_flux = mxGetPr(plhs[0]); double* S2_flux = mxGetPr(plhs[1]); double* S3_flux = mxGetPr(plhs[2]); // Define some constants int nxp1 = nx+1; int nxm1 = nx-1; int nvxt2 = nvx*2; int i, ip1, j, c_id; double nu_factor, S2_factor, S3_factor; double int_dv; for (i = 0; i < nx; i++) { ip1 = i+1; for (j = 0; j < nvxt2; j++) { c_id = i*nvxt2 + j; // cell id nu_factor = 1.0; // factor for nu_flux S2_factor = v[j]; // factor for S2_flux S3_factor = 0.5*v[j]*v[j]; // factor for S3_flux if (v[j] <= 0.0) { // if velocity is negative int_dv = sparseMatColVec(f, ir_rhs, jc_rhs, s_rhs, ncol_rhs, c_id)*dx[i]; nu_flux[ip1]+= int_dv*nu_factor; S2_flux[ip1]+= int_dv*S2_factor; S3_flux[ip1]+= int_dv*S3_factor; if (i == 0) { int_dv = sparseMatColVec(f, ir_lhs, jc_lhs, s_lhs, ncol_lhs, c_id)*dx[i]; nu_flux[i]+= int_dv; S2_flux[i]+= int_dv; S3_flux[i]+= int_dv; } } else if (v[j] > 0.0) { // if veloicyt is positive int_dv = sparseMatColVec(f, ir_lhs, jc_lhs, s_lhs, ncol_lhs, c_id)*dx[i]; nu_flux[i]+= int_dv*nu_factor; S2_flux[i]+= int_dv*S2_factor; S3_flux[i]+= int_dv*S3_factor; if (i == nxm1) { int_dv = sparseMatColVec(f, ir_rhs, jc_rhs, s_rhs, ncol_rhs, c_id)*dx[i]; nu_flux[ip1]+= int_dv*nu_factor; S2_flux[ip1]+= int_dv*S2_factor; S3_flux[ip1]+= int_dv*S3_factor; } } // velocity if else if check } // loop through velocity grid } // loop through configuration grid}
//========================================================================
Best Answer