MATLAB: Error using zeros, Maximum variable size allowed by the program is exceeded.

Error using zeros Maximum variable size allowed by the program is exceeded.

Error in knnclassify>distfun (line 216) D = zeros(n,size(Test,1));

Code:

function class = knnclassify(sample, TRAIN, group, K, distance,tieBreaker)

%KNNCLASSIFY K-Nearest Neighbor Classifier.

% CLASS = KNNCLASSIFY(SAMPLE,TRAINING,GROUP) classifies each row of the

% data in SAMPLE into one of the groups in TRAINING using the nearest

% neighbour method. SAMPLE and TRAINING must be matrices with the same

% number of columns. GROUP is a grouping variable for TRAINING. Its

% unique values define groups, and each element defines the group to

% which the corresponding row of TRAINING belongs. GROUP can be a

% numeric vector, a string array, or a cell array of strings. TRAINING

% and GROUP must have the same number of rows. CLASSIFY treats NaNs or

% empty strings in GROUP as missing values, and ignores the corresponding

% rows of TRAINING. CLASS indicates which group each row of SAMPLE has

% been assigned to, and is of the same type as GROUP. % % CLASS = KNNCLASSIFY(SAMPLE,TRAINING,GROUP,K) allows you to specify K,

% the number of nearest neighbors used in the classification. The default

% is 1. % % CLASS = KNNCLASSIFY(SAMPLE,TRAINING,GROUP,K,DISTANCE) allows you to

% select the distance metric. Choices are:

% {'sqEuclidean'} – Squared Euclidean distance

% 'cityblock' – Sum of absolute differences, a.k.a. L1

% 'cosine' – One minus the cosine of the included angle

% between points (treated as vectors)

% 'correlation' – One minus the sample correlation between

% points (treated as sequences of values)

% 'Hamming' – Percentage of bits that differ (only

% suitable for binary data) % % CLASS = KNNCLASSIFY(SAMPLE,TRAINING,GROUP,K,DISTANCE,TIEBREAK) allows

% you to specify the method used to break a tie in the number of nearest

% neighbors. Options are 'random', which selects a random tiebreaker, and

% 'nearest' which uses the nearest neighbor amongst the tied groups to

% break the tie.

% % Examples: % % % training data: two normal components

% training = [mvnrnd([ 1 1], eye(2), 100); …

% mvnrnd([-1 -1], 2*eye(2), 100)];

% group = [repmat(1,100,1); repmat(2,100,1)];

% gscatter(training(:,1),training(:,2),group);hold on; % % % % some random sample data

% sample = unifrnd(-5, 5, 100, 2);

% % classify the sample using the nearest neighbor classification

% c = knnclassify(sample, training, group); % % gscatter(sample(:,1),sample(:,2),c,'mc'); hold on;

% c3 = knnclassify(sample, training, group, 3);

% gscatter(sample(:,1),sample(:,2),c3,'mc','o');

% $Revision: $ $Date: 2002/04/11 14:24:07 $

% References:

% [1]

if nargin < 3

    error('Requires at least three arguments.');

end

% grp2idx sorts a numeric grouping var ascending, and a string grouping

% var by order of first occurrence

[gindex,groups] = grp2idx(group);

nans = find(isnan(gindex));

if length(nans) > 0

    TRAIN(nans,:) = [];
    gindex(nans) = [];
end

ngroups = length(groups);

gsize = hist(gindex,1:ngroups);

[n,d] = size(TRAIN);

if size(gindex,1) ~= n

    error('The length of GROUP must equal the number of rows in TRAINING.');

elseif size(sample,2) ~= d

    error('SAMPLE and TRAINING must have the same number of columns.');

end

m = size(sample,1);

if nargin < 4

    K = 1;

elseif ~isnumeric(K)

    error('K must be numeric.');

end

if nargin < 5 isempty(distance)

    distance  = 'sqeuclidean';

end

if ischar(distance)

    distNames = {'sqeuclidean','cityblock','cosine','correlation','hamming'};
    i = strmatch(lower(distance), distNames);
    if length(i) > 1
        error('stats:knn:AmbiguousDistance', ...
            'Ambiguous ''distance'' parameter value:  %s.', distance);
    elseif isempty(i)
        error('stats:knn:UnknownDistance', ...
            'Unknown ''distance'' parameter value:  %s.', distance);
    end
    distance = distNames{i};

else

    error('stats:knn:InvalidDistance', ...
        'The ''distance'' parameter value must be a string.');
end

if nargin < 6

    tieBreaker = 'random';

elseif ischar(tieBreaker)

    %     if K == 1
    %         warning('Tie breaker is not necessary when K is 1.');
    %     else
    tieNames = {'random','nearest','farthest'};
    i = strmatch(lower(tieBreaker), tieNames);
    if length(i) > 1
        error('stats:knn:AmbiguousDistance', ...
            'Ambiguous ''distance'' parameter value:  %s.', distance);
    elseif isempty(i)
        error('stats:knn:UnknownDistance', ...
            'Unknown ''distance'' parameter value:  %s.', distance);
    end
    tieBreaker = tieNames{i};
    %     end

else

    error('stats:knn:InvalidDistance', ...
        'The ''distance'' parameter value must be a string.');

end

% Calculate the distances from all points in the training set to all points

% in the test set.

dists = distfun(sample,TRAIN,distance);

% find the K nearest

if K >1

    [dSorted,dIndex] = sort(dists,2);
    dIndex = dIndex(:,1:K);
    classes = gindex(dIndex);
    % special case when we have one input -- this gets turned into a   
    % column vector, so we have to turn it back into a row vector.
    if size(classes,2) == 1
        classes = classes';
    end
    % count the occurences of the classes
    counts = zeros(m,ngroups);
    for outer = 1:m
        for inner = 1:K
            counts(outer,classes(outer,inner)) = counts(outer,classes
(outer,inner)) + 1;
        end
    end
    [L,class] = max(counts,[],2);
    % we need to check case where L <= K/2 for possible ties
    checkRows = find(L<=(K/2));
    for i = 1:numel(checkRows)
        ties = counts(checkRows(i),:) == L(checkRows(i));
        numTies = sum(ties);
        if numTies > 1
            choice = find(ties);
            switch tieBreaker
                case 'random'
                    % random tie break 
                    tb = randsample(numTies,1);
                    class(checkRows(i)) = choice(tb);
                case 'nearest'
                    % find the use the closest element of the equal groups

                    % to break the tie

                    for inner = 1:K
                        if ismember(classes(checkRows(i),inner),find(ties))
                            class(checkRows(i)) = classes(checkRows(i),inner);
                            break
                        end
                    end
                case 'farthest'
                    % find the use the closest element of the equal groups
                    % to break the tie
                    for inner = K:-1:1
                        if ismember(classes(checkRows(i),inner),find(ties))
                            class(checkRows(i)) = classes(checkRows(i),inner);
                            break
                        end
                    end
            end
        end
    end

else

    % Need to deal with a tie
    [dSorted,dIndex] = min(dists,[],2);
    class = gindex(dIndex);

end

% Convert back to original grouping variable

if isnumeric(group)

    groups = str2num(char(groups));
    class = groups(class);

elseif ischar(group)

    groups = char(groups);
    class = groups(class,:);

else %if iscellstr(group)

    class = groups(class);

end

function D = distfun(Train, Test, dist)

%DISTFUN Calculate distances from training points to test points.

[n,p] = size(Train);

D = zeros(n,size(Test,1));

numTest = size(Test,1);

switch dist

    case 'sqeuclidean'
        for i = 1:numTest
            D(:,i) = sum((Train - Test(repmat(i,n,1),:)).^2, 2);
        end
    case 'cityblock'
        for i = 1:numTest
            D(:,i) = sum(abs(Train - Test(repmat(i,n,1),:)), 2);
        end
    case {'cosine','correlation'}
        % Normalized both the training and test data.
        normTrain = sqrt(sum(Train.^2, 2));
        normTest = sqrt(sum(Test.^2, 2));
        normData = sqrt(sum([Train;Test].^2, 2));
        Train = Train ./ normTrain(:,ones(1,size(Train,2)));
        if any(normData < eps) % small relative to unit-length data points
            error('stats:knn:ZeroTestentroid', ...
                'Zero cluster centroid created at iteration %d.',iter);
        end
        % This can be done without a loop, but the loop saves memory allocations
        for i = 1:numTest
            D(:,i) = 1 - (Train * Test(i,:)') ./ normTest(i);
        end
    case 'hamming'
        if ~all(ismember(X(:),[0 1]))
            error('Non-binary data cannot be clustered using Hamming distance.');
        end
        for i = 1:numTest
            D(:,i) = sum(abs(Train - Test(repmat(i,n,1),:)), 2) / p;
        end
end

Best Answer

The easiest way to debug this probably to type

>> dbstop if error

before running your code. Then, execute your code, and it will halt when it encounters the error, and enter debug mode. You can then investigate your variables, to see what's going on.

In your case, given the error message, I am guessing that either n or size(Test,1) is much larger than you expect. You should be able to trace back from there.

Best Answer

Related Solutions

MATLAB: Value of ‘support’ parameter must have two elements.

MATLAB: Error in classifying using knn classify

Related Question