Open
Description
graphtools should provide certain canonical manifold learning datasets.
MATLAB implementation courtesy of Nick Marshall
function [X,C,name] = benchmark(code,N,alpha)
% BENCHMARK Generates manifold learning benchmark datasets.
% [X,C,name] = benchmark(code,N,alpha) generates dataset X with color C and
% description name. The inputs are the number of points N, and a parameter
% ALPHA which will somehow deform the data set X. The code is an integer
% 1-10 which corresponds to one of the following:
% 1 'SwissRolll'
% 2 'SwissHole'
% 3 'CornerPlanes'
% 4 'PuncturedSphere'
% 5 'TwinPeaks'
% 6 'ToroidalHelix'
% 7 'GaussianExample'
% 8 'Uniform spaced ellipse'
% 9 'Uniform random ellipse'
% 10 'Barbell'
% Nicholas F Marshall - Dec 2015
dataNames = {'swissroll','swisshole','cornerplanes',...
'puncturedsphere','twinpeaks','toroidalhelix ',...
'gaussianexample'};
if isnumeric(code)
exampleValue = code;
else
exampleValue = strmatch(lower(code),dataNames);
end
switch exampleValue
case 1 % Swiss Roll
tt = (3*pi/2)*(1+2*rand(1,N));
height = 21*rand(1,N);
X = [tt.*cos(tt); height; alpha*tt.*sin(tt)]';
C = tt';
name = 'Swiss Rolll';
case 2 % Swiss Hole
% Swiss Roll w/ hole example taken from Donoho & Grimes
tt = (3*pi/2)*(1+2*rand(1,2*N));
height = 21*rand(1,2*N);
kl = zeros(1,2*N);
for ii = 1:2*N
if ( (tt(ii) > 9)&&(tt(ii) < 12))
if ((height(ii) > 9) && (height(ii) <14))
kl(ii) = 1;
end
end
end
kkz = find(kl==0);
tt = tt(kkz(1:N));
height = height(kkz(1:N));
X = [tt.*cos(tt); height; alpha*tt.*sin(tt)]';
C = tt';
name = 'Swiss Hole';
case 3 % Corner Planes
k = 1;
xMax = floor(sqrt(N))+1;
yMax = floor(N/xMax);
cornerPoint = floor(yMax/2);
for x = 1:xMax
for y = 1:yMax
if y <= cornerPoint
X(k,:) = [x,y,0];
C(k) = y;
else
X(k,:) = [x,...
cornerPoint+(y-cornerPoint)*cos(pi*alpha/180),...
(y-cornerPoint)*sin(pi*alpha/180)];
C(k) = y;
end
k = k+1;
end
end
X = X;
C = C';
name = 'Corner Planes';
case 4 % Punctured Sphere by Saul & Roweis
inc = 9/sqrt(N); %inc = 1/4;
[xx,yy] = meshgrid(-5:inc:5);
rr2 = xx(:).^2 + yy(:).^2;
[tmp ii] = sort(rr2);
Y = [xx(ii(1:N))'; yy(ii(1:N))'];
a = 4./(4+sum(Y.^2));
X = [a.*Y(1,:); a.*Y(2,:); alpha*2*(1-a)]';
C = X(:,3);
name = 'Punctured Sphere';
case 5 % Twin Peaks by Saul & Roweis
inc = 1.5 / sqrt(N); % inc = 0.1;
[xx2,yy2] = meshgrid(-1:inc:1);
zz2 = sin(pi*xx2).*tanh(3*yy2);
xy = 1-2*rand(2,N);
X = [xy; sin(pi*xy(1,:)).*tanh(3*xy(2,:))]';
X(:,3) = alpha * X(:,3);
C = X(:,3);
name = 'Twin Peaks';
case 6 % Toroidal Helix by Coifman & Lafon
noiseSigma=0.05; %noise parameter
t = (1:N)'/N;
t = t.^(alpha)*2*pi;
X = [(2+cos(8*t)).*cos(t) (2+cos(8*t)).*sin(t) sin(8*t)]+...
noiseSigma*randn(N,3);
C = t;
name = 'Toroidal Helix ';
case 7 % Gaussian randomly sampled
X = alpha * randn(N,3);
X(:,3) = 1 / (alpha^2 * 2 * pi) *...
exp ( (-X(:,1).^2 - X(:,2).^2) / (2*alpha^2) );
C = X(:,3);
name = 'Gaussian example';
case 8 % Uniform spaced ellipse
theta =( (0:1/N:1-1/N)*2*pi)';
X = [cos(theta) sin(theta)];
C = theta;
name = 'Uniformed spaced ellipse';
case 9 % Uniform random ellipse
theta =2*pi*rand(N,1);
X = [cos(theta) sin(theta)];
C =theta;
name = 'Uniform random ellipse';
case 10 % Barbell
k = 1;
while k<=N
x = (2+alpha/2)*rand;
y = (2+alpha/2)*rand;
if (x-.5)^2+(y-.5)^2<=.25
X(k,:) = [x,y];
C(k,1) = 0;
k = k+1;
elseif abs(x-1-alpha/4)<alpha/4 && abs(y-.5)<.125
X(k,:) = [x,y];
C(k,1) = 1;
k = k+1;
elseif (x-1.5-alpha/2)^2+(y-.5)^2<=.25
X(k,:) = [x,y];
C(k,1) = 2;
k = k+1;
end
end
name = 'Barbell';
end