% clc
% clear all
%
% 导入数据(原始数据)
Data = xlsread('F:\实验数据\数据汇总表111','D:M');
figure(1)
x3str={'z1','z2','z3','z4','z5','z6','z7','z8','z9','z10','z11','z12','z13','z14','z15'}
bar(Data(1:15))
xtextp=1:15;%每个标签放置位置的横坐标,这个自然应该和原来的一样了。
ytextp=-0.1*0.18*ones(1,length(xtextp));
text(xtextp-0.3,ytextp,x3str)
set(gca,'XTickLabel',[]); %将原坐标(1,2,3,..)去掉
title('变量z1-z15空值百分比图','fontsize',12)
ylim([0,0.85])
figure(2)
x3str={'z16','z17','z18','z19','z20','z21','z22','z23','z24','z25','z26','z27','y1','y2'};
bar(Data(16:29))
xtextp=1:14;%每个标签放置位置的横坐标,这个自然应该和原来的一样了。
ytextp=-0.1*0.04*ones(1,length(xtextp));
text(xtextp-0.3,ytextp,x3str)
set(gca,'XTickLabel',[]); %将原坐标(1,2,3,..)去掉
title('变量z11-z27、y1、y2空值百分比图','fontsize',12)
%
% [M N] = size(Data); %矩阵B的行数与列数
% % 缺失值补全,
% % 补全依据是确失的值认为与它上一行的数据一样,即每个时间段测一次
% % 未测的时间段认定与它最近的上次测试结果一致.
% for i = 2:M
% for j = 1:N
%
% AA = isnan(Data);
%
% if AA(i,j) == 1
%
% Data(i,j) = Data(i-1,j);
%
% else
%
% Data(i,j) = Data(i,j);
%
% end
% end
% end
%
%
% % 删除数据中的空值
% [U V] = size(Data);
% BB = isnan(Data);
% [u,v] = find(BB()==1);
% Data(u,:) = [];
figure(1);boxplot(Data(:,1:10))
title('变量z1-z10箱体图','fontsize',12)
set(gca,'Xticklabel',{'z1','z2','z3','z4','z5','z6','z7','z8','z9','z10'});
figure(2);boxplot(Data(:,11:27))
title('变量z11-z27箱体图','fontsize',12)
set(gca,'Xticklabel',{'z11','z12','z13','z14','z15','z16','z17','z18','z19','z20','z21','z22','z23','z24','z25','z26','z27'});
figure(3);boxplot(Data(:,28:29))
title('变量y1,y2箱体图','fontsize',12)
set(gca,'Xticklabel',{'y1','y2'});
%% 使用肖维勒方法(等置信概率)剔除异常值
[m n] = size(Data);
Y = [];
w = 1 + 0.4*log(m); % 肖维勒系数(近似计算公式)
for i = 1:n
x = Data(:,i);
YiChang = abs(x-mean(x)) > w*std(x);
%YiChang = x > mean(x)+3*std(x) | x < mean(x)-3*std(x);
%Std(:,i) = std(x);
%Mean(:,i) = mean(x);
Y(:,i) = YiChang;
end
[u v] = find(Y() == 1); % 找出异常值所在的行与列
uu = unique(u); % 剔除重复的行数
Data(uu,:) = [ ]; %令异常值所在行为空,即剔除异常值