-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdemo_LWEA_and_LWGP.m
154 lines (127 loc) · 5.68 KB
/
demo_LWEA_and_LWGP.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
% This is a demo for the LWEA and LWGP algorithms. If you find this %
% code useful for your research, please cite the paper below. %
% %
% Dong Huang, Chang-Dong Wang, and Jian-Huang Lai. %
% "Locally weighted ensemble clustering." %
% IEEE Transactions on Cybernetics, 2018, 48(5), pp.1460-1473. %
% %
% The code has been tested in Matlab R2014a and Matlab R2015a on a %
% workstation with Windows Server 2008 R2 64-bit. %
% %
% https://www.researchgate.net/publication/316681928 %
% %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function demo_LWEA_and_LWGP()
%% A demo for the LWEA and LWGP algorithms.
clear all;
close all;
clc;
%% Load the base clustering pool.
% We have generated a pool of 100 candidate base clusterings for each dataset.
% Please uncomment the dataset you want to use, and comment the other ones.
% If you don't want to use the pre-generated base clusterings, you may as
% well generate your own base clusterings by using k-means or any other
% clustering algorithms.
% dataName = 'VS';
% dataName = 'Semeion';
% dataName = 'SPF';
dataName = 'MF';
% dataName = 'IS';
% dataName = 'Caltech20';
% dataName = 'FCT';
% dataName = 'MNIST';
% dataName = 'Texture';
% dataName = 'ODR';
% dataName = 'LS';
% dataName = 'ISOLET';
% dataName = 'PD';
% dataName = 'USPS';
% dataName = 'LR';
members = [];
gt = [];
load(['bc_pool_',dataName,'.mat'],'members','gt');
[N, poolSize] = size(members);
%% Parameter
para_theta = 0.4;
%% Settings
% Ensemble size M
M = 10;
% How many times the LWEA and LWGP algorithms will be run.
cntTimes = 10;
% You can set cntTimes to a greater (or smaller) integer if you want to run
% the algorithms more (or less) times.
% For each run, M base clusterings will be randomly drawn from the pool.
% Each row in bcIdx corresponds to an ensemble of M base clusterings.
bcIdx = zeros(cntTimes, M);
for i = 1:cntTimes
tmp = randperm(poolSize);
bcIdx(i,:) = tmp(1:M);
end
%% Run LWEA and LWGP repeatedly.
% The numbers of clusters.
clsNums = [2:30];
% Scores
nmiScoresBestK_LWEA = zeros(cntTimes, 1);
nmiScoresTrueK_LWEA = zeros(cntTimes, 1);
nmiScoresBestK_LWGP = zeros(cntTimes, 1);
nmiScoresTrueK_LWGP = zeros(cntTimes, 1);
for runIdx = 1:cntTimes
disp('**************************************************************');
disp(['Run ', num2str(runIdx),':']);
disp('**************************************************************');
%% Construct the ensemble of M base clusterings
% baseCls is an N x M matrix, each row being a base clustering.
baseCls = members(:,bcIdx(runIdx,:));
%% Get all clusters in the ensemble
[bcs, baseClsSegs] = getAllSegs(baseCls);
%% Compute ECI
disp('Compute ECI ... ');
tic;
ECI = computeECI(bcs, baseClsSegs, para_theta);
toc;
%% Compute LWCA
LWCA= computeLWCA(baseClsSegs, ECI, M);
%% Perform LWGP
disp('Run the LWGP algorithm ... ');
resultsLWGP = runLWGP(bcs, baseClsSegs, ECI, clsNums);
disp('--------------------------------------------------------------');
%% Perform LWEA
disp('Run the LWEA algorithm ... ');
resultsLWEA = runLWEA(LWCA, clsNums);
% The i-th column in resultsLWEA represents the consensus clustering
% with clsNums(i) clusters by LWEA.
disp('--------------------------------------------------------------');
%% Display the clustering results.
disp('##############################################################');
scoresLWGP = computeNMI(resultsLWGP,gt);
scoresLWEA = computeNMI(resultsLWEA,gt);
nmiScoresBestK_LWEA(runIdx) = max(scoresLWEA);
trueK = numel(unique(gt));
nmiScoresTrueK_LWEA(runIdx) = scoresLWEA(clsNums==trueK);
nmiScoresBestK_LWGP(runIdx) = max(scoresLWGP);
nmiScoresTrueK_LWGP(runIdx) = scoresLWGP(clsNums==trueK);
disp(['The Scores at Run ',num2str(runIdx)]);
disp(' ---------- The NMI scores w.r.t. best-k: ---------- ');
disp(['LWGP : ',num2str(nmiScoresBestK_LWGP(runIdx))]);
disp(['LWEA : ',num2str(nmiScoresBestK_LWEA(runIdx))]);
disp(' ---------- The NMI scores w.r.t. true-k: ---------- ');
disp(['LWGP : ',num2str(nmiScoresTrueK_LWGP(runIdx))]);
disp(['LWEA : ',num2str(nmiScoresTrueK_LWEA(runIdx))]);
disp('##############################################################');
%% Save results
save(['results_',dataName,'.mat'],'bcIdx','nmiScoresBestK_LWEA','nmiScoresTrueK_LWEA','nmiScoresBestK_LWGP','nmiScoresTrueK_LWGP');
end
disp('**************************************************************');
disp(['** Average Performance over ',num2str(cntTimes),' runs on the ',dataName,' dataset **']);
disp(['Data size: ', num2str(N)]);
disp(['Ensemble size: ', num2str(M)]);
disp(' ---------- Average NMI scores w.r.t. best-k: ---------- ');
disp(['LWGP : ',num2str(mean(nmiScoresBestK_LWGP))]);
disp(['LWEA : ',num2str(mean(nmiScoresBestK_LWEA))]);
disp(' ---------- Average NMI scores w.r.t. true-k: ---------- ');
disp(['LWGP : ',num2str(mean(nmiScoresTrueK_LWGP))]);
disp(['LWEA : ',num2str(mean(nmiScoresTrueK_LWEA))]);
disp('**************************************************************');
disp('**************************************************************');