ကြိုက်တဲ့ အကွာအဝေး တိုင်းတဲ့ နည်းသုံးပြီး k-means clustering လုပ်ပေးတဲ့ MATLAB ဖန်ရှင် လေး တစ်ခု ရေးကြည့် ထားပါတယ်။ ဥပမာ histograms တွေကို ခွဲခြားဖို့ဆိုရင် chi-square distance ကို သုံးတာက ပိုကောင်း ချင် ကောင်းနိုင်ပါတယ်။ အောက်က နမူနာမှာ chi-square distance သုံးထားတာကို ပြထားပေမယ့် အဲဒီ distance ဖန်ရှင်နေရာမှာ ကိုယ်ကြိုက်တဲ့ နည်း ကို အစားထိုးပြီး သုံးလို့ရပါတယ်။
%k-means test program
X = [randn(100,2)+ones(100,2);...
randn(100,2)-ones(100,2)];
[idx,ctrs] = KMeansCustom(X,2);
%[idx,ctrs] = kmeans(X,2);
plot(X(idx==1,1),X(idx==1,2),'r.','MarkerSize',12)
hold on
plot(X(idx==2,1),X(idx==2,2),'b.','MarkerSize',12)
plot(ctrs(:,1),ctrs(:,2),'kx',...
'MarkerSize',12,'LineWidth',2)
legend('Cluster 1','Cluster 2','Centroids',...
'Location','NW')
function [Idx,C]=KMeansCustom(X,k)
%KMeansCustom partitions the points in the n-by-d data matrix X into k clusters.
%[Idx,C]= KMeansCustom(X,k) returns
%n-by-1 vector IDX containing the cluster indices of each point and
%k-by-d matrix C containing the k cluster centroid locations.
%For n sample points with d dimensions in each point, X has n rows and d columns.
%File name: KMeanCustom.m
%Author: Yan Naing Aye
%Website: http://cool-emerald.blogspot.sg/
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Define maximum number of iterations
MaxIter=500;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[n,d]=size(X);
k=round(k);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%step1 :arbitrarily choose k samples as the initial cluster centers
p=randperm(n);
Mu=X(p(1:k),:);
D=zeros(k,d);
for t=1:MaxIter
%step2:distribute the samples X to the clusters
for j=1:k
for i=1:n
D(j,i)=ChiDist(X(i,:),Mu(j,:));%Use custom distance
end
end
[ValMin,IndexMin]=min(D);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%step 3: update the cluster centers
OldMu=Mu;
for i=1:k
Mu(i,:)=mean(X(IndexMin==i,:));
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%step4 :check convergence
if sum(sum(abs(OldMu-Mu))) == 0 %< 1e-9
break
end
end
Idx=IndexMin';
C=Mu;
function d=ChiDist(v1,v2)
dv=(v1-v2).^2;
sv=abs(v1)+abs(v2);
%------------------------------------------------------
%eliminate zero denominator
sv(sv==0)=1e-9;
%------------------------------------------------------
d=sum(dv./sv)./2;
end
No comments:
Post a Comment