function [X,Y] = led(seed,N); %LED Noisy LED display problem. % [X,Y] = LED(SEED) produces a sample of 200 (X,Y) % pairs, of 24 covariates X(i,:) and ten explained variable Y(i). % X(:,[1:7]) correspond to the seven segments of a LED display, % coded 0.5 if on and -0.5 if off. Each segment has 10% chance of being % wrong, independantly of the state of the other ones. % X(:,[8:24]) are independant Bernouilli variables with p=0.5. % Y {1,...,9,10} is the represented digit {1,...,9,0}. % SEED is an optional parameter selecting the seed of the normal % pseudo-random generator used to generate X. % [X,Y] = LED(SEED,N) produces a sample of size N. % % 1 % __________ ___ ___ ___ % / / / / / / / / % 6/ / -- -- -- -- % / / 2 / / / / / % / 7 / -- -- -- % ----------- % / / ___ ___ ___ ___ ___ % 5 / / / / / / / / / / % / / 3 -- -- -- % / / / / / / / / / / % ----------- -- -- -- % 4 % Source: Breiman, L., Friedman, J. H., Olshen, R. and Stone, C. J. % Classification and Regression Trees, Wadsworth 1984. % 20/06/01 Y. Grandvalet % -------------------------------------------------- % Check number of input arguments - Set default % -------------------------------------------------- if (nargin<2) N = 200; % sample size if (nargin<1) seed = sum(100*clock); end; end; remainder = rem(N,10); if remainder~=0 N = N + 10 - remainder ; warning(['sample size turned to ' num2str(N)]); end; randn('seed',seed) % random number initializations rand('seed',seed) Nk = round(N/10); LED = [0 1 1 0 0 0 0; 1 1 0 1 1 0 1; 1 1 1 1 0 0 1; 0 1 1 0 0 1 1; 1 0 1 1 0 1 1; 1 0 1 1 1 1 1; 1 1 1 0 0 0 0; 1 1 1 1 1 1 1; 1 1 1 0 0 1 1; 1 1 1 1 1 1 0]; Class = eye(10); % relevant variables X = zeros(N,24); Y = zeros(N,1); for i=1:10; X((i-1)*Nk+1:i*Nk,1:7) = LED(i*ones(Nk,1),:); Y((i-1)*Nk+1:i*Nk) = i; end; noise = (rand(N,7)<0.1); % 10% chance of being wrong X(:,1:7) = X(:,1:7) + (X(:,1:7)==0).*(noise==1) - (X(:,1:7)==1).*(noise==1); % irrelevant variables X(:,8:24) = (rand(N,17)<0.5); % inputs with values -.5 .5 X = X - 0.5;