function W = lofdd(a, fracrej, k, distmat, sD)
if (nargin < 5)
distmat = [];
sD = [];
end
if nargin < 3 || isempty(k), k = 3; end
if nargin < 2 || isempty(fracrej), fracrej = 0.05; end
if nargin < 1 || isempty(a)
W = mapping(mfilename,{fracrej,k});
W = setname(W,sprintf('LOF k:%d', k));
return
end
if ~ismapping(fracrej)
a = +target_class(a);
[m,d] = size(a);
if (m<2)
warning('dd_tools:InsufficientData','Dataset contains less than 2 objects');
end
if (k>=m)
error(['More neighbors than training samples are requested! (max=',num2str(m-1),')']);
end
if isa(k,'char')
error('Argument k should define the number of neighbors');
end
if (k<1)
warning('dd_tools:KNegativeK','K must be positive (>0)');
end
if(isempty(distmat) || isempty(sD))
distmat = sqrt(sqeucldistm(a,a));
[sD,I] = sort(distmat,2);
end
k_distance = sD(:,k+1);
k_distance_neighborhood = zeros(m,m);
for p = 1:m
k_distance_neighborhood(p,:) = logical(distmat(p,:) <= k_distance(p));
k_distance_neighborhood(p,p) = 0;
end
k_distance_neighborhood_size = sum(k_distance_neighborhood,2);
reachability_distance = zeros(m,m);
for p = 1:m
for o = 1:m
reachability_distance(p,o) = max(k_distance(o), distmat(p,o));
end
end
local_reachability_density = zeros(m,1);
for p = 1:m
local_reachability_density(p) = 1 ./ (1e-10+(sum(reachability_distance(p,logical(k_distance_neighborhood(p,:))) / k_distance_neighborhood_size(p))));
end
lof = zeros(m,1);
for p = 1:m
lof(p) = sum(local_reachability_density(logical(k_distance_neighborhood(p,:))) / local_reachability_density(p)) / k_distance_neighborhood_size(p);
end
fit = lof;
thresh = dd_threshold(fit,1-fracrej);
W.distmat = distmat;
W.sD = sD;
W.k_distance = k_distance;
W.local_reachability_density = local_reachability_density;
W.lof = lof;
W.x = +a;
W.k = k;
W.threshold = thresh;
W.scale = mean(fit);
W = mapping(mfilename,'trained',W,str2mat('target','outlier'),d,2);
W = setname(W,sprintf('LOF k:%d', k));
else
W = getdata(fracrej);
[m,d] = size(a);
[n,d] = size(W.x);
if(isempty(distmat) || isempty(sD))
distmat = sqrt(sqeucldistm(+a,W.x));
[sD,I] = sort(distmat,2);
end
new_train_distmat = zeros(n+1,n+1);
new_train_distmat(1:n,1:n) = W.distmat;
k_distance = sD(:,W.k);
k_distance_neighborhood = zeros(m,n);
for p = 1:m
k_distance_neighborhood(p,:) = logical(distmat(p,:) <= k_distance(p));
end
lof = zeros(m,1);
for p = 1:m
new_train_distmat(n+1,1:n) = distmat(p,:);
new_train_distmat(1:n,n+1) = distmat(p,:)';
[new_train_sD, I] = sort(new_train_distmat, 2);
new_train_k_distance = new_train_sD(:,W.k+1);
neighbors_of_p = [n+1, find(logical(k_distance_neighborhood(p,:)))];
lrd_of_nn_of_p = zeros(numel(neighbors_of_p), 1);
sum_lrd_fraction = 0;
nn_index = 0;
for nn = neighbors_of_p
nn_index = nn_index + 1;
neighbors_of_neighbors_of_p = logical(new_train_distmat(nn,:) <= new_train_k_distance(nn));
neighbors_of_neighbors_of_p(nn) = 0;
sum_reach_dist = 0;
num_nn_nn = 0;
for nn_nn = find(neighbors_of_neighbors_of_p)
num_nn_nn = num_nn_nn + 1;
sum_reach_dist = sum_reach_dist + max(new_train_k_distance(nn_nn), new_train_distmat(nn, nn_nn));
end
lrd = 1 / ((sum_reach_dist + 1e-10) / num_nn_nn);
lrd_of_nn_of_p(nn_index) = lrd;
if(nn_index > 1),
sum_lrd_fraction = sum_lrd_fraction + (lrd / lrd_of_nn_of_p(1));
end
end
lof(p) = sum_lrd_fraction / (nn_index-1);
end
ind = lof;
out = [ind repmat(W.threshold,[m,1])];
W = setdat(a,-out,fracrej);
W = setfeatdom(W,{[-inf 0;-inf 0] [-inf 0; -inf 0]});
end
return
Best Answer