Univariate 过程可以检验一个变量是否服从正态分布具体的用法如下:
proc univariate data =数据集;
var 变量名;
run;
例子
输入数据
/**正态性检验**/
data sasuser.stock;
input CODE $ NAME $ EPS @@;
cards;
000096 广聚能源 0.059
000099 中信海直 0.028
000150 ST麦科特 -0.003
000151 中成股份 0.026
000153 新力药业 0.056
000155 川化股份 -0.009
000156 安塑股份 0.033
000157 中联重科 0.06
000158 常山股份 0.018
000159 国际实业 0.008
000301 丝绸股份 0.04
000488 晨鸣纸业 0.101
000725 京东方 0.044
000835 隆源实业 0.07
000869 张裕 0.194
000877 天山股份 -0.084
000885 ST春都 -0.073
000890 法尔胜 0.031
000892 长丰通信 0.031
000897 津滨发展 0.002
000900 现代投资 0.058
000901 航天科技 0.005
000902 中国服装 -0.031
000903 云内动力 0.109
000905 厦门路桥 0.046
000906 南方建材 0.007
000908 天一科技 0.006
000909 数源科技 0.002
000910 大亚股份 0.036
000911 南宁糖业 0.067
000912 泸天化 0.112
000913 钱江摩托 0.062
000915 山大华特 0.001
000916 华北高速 0.038
000917 电广传媒 0.086
000918 亚华种业 -0.045
;
run;
正态性检验
proc univariate data=sasuser.stock normal;
var eps;
run;
绘制直方图和概率分布图
proc univariate data=sasuser.stock normal;
var eps;
histogram eps;
probplot eps;
run;
概率分布图画出的是变量分布函数的图形 横轴是分位数 范围从 0 到 100 竖轴 是变量的值。
T检验一般格式
proc ttest data=数据集 h0=均值;
var 检验变量;
run;
接上例
proc ttest data=sasuser.stock h0=0.03;
var eps;
run;
proc ttest data=sasuser.stock h0=0.5;
var eps;
run;
两样本均值T检验:
proc ttest data=数据集;
class 分类变量;
var 检验变量;
run;
例子
data sasuser.score;
input name $ sex $ math@@;
cards;
Alice F 90
Jenny F 93
Kate F 97
Bennie F 82
Hellen F 85
Wincelet F 90
Chris F 89
Janet F 86
Tom m 95
Mike m 80
Fred m 84
Alex m 92
Cook m 75
Butt m 77
Geoge m 86
Tod m 89
;
run;
proc ttest data=sasuser.score;
class sex;
var math;
run;
/**看差值是否相等**/
proc ttest data=sasuser.score h0=-3;
class sex;
var math;
run;
注意:t检验要求两个独立样本都服从正态分布;如果不是正态分布,可以采用非参方法
data sasuser.scorec ;
input name$ sex $ chinese;
cards;
Alice f 85
Jenny f 90
Kate f 83
Bennie f 79
Hellen f 74
Wincelet f 82
Chris f 84
Janet f 65
Tom m 87
Mike m 85
Fred m 85
Alex m 90
Cook m 78
Butt m 81
Geoge m 85
Tod m 84
;
run;
/**wilcoxon非参检验法**/
proc npar1way data=sasuser.scorec wilcoxon;
class sex;
var chinese;
run;
输出,第一部分是wilcoxon秩和统计量,第二部分秩和检验结果,第三部分kruskal-wallis检验结果
/**median 两样本位于值均之上的个数**/
proc npar1way data=sasuser.scorec median;
class sex;
var chinese;
run;
输出,第一部分是相关统计量,第二部分是两样本检验结果,第三部分是单因素检验结果
例子
data sasuser.scales;
input no $ old new;
cards;
1 125 150
2 265 233
3 337 386
4 59 67
5 69 60
6 456 400
7 205 255
8 302 260
9 100 120
10 50 50
11 92 89
12 84 99
13 63 63
14 49 50
15 163 180
16 277 290
17 25 60
18 360 350
19 52 65
20 88 96
;
run;
data minus;
set sasuser.scales;
add=new-old;
keep add;/**计算增值**/
run;
t检验
proc ttest data=minus;
var add;
run;
PROC REG DATA 输入数据集 选项
VAR 变量列表
MODEL 因变量 自变量列表
PRINT 输出结果
PLOT 诊断图形
RUN
例子
/**reg方法**/
data sasuser.stock;
input label $ eps scale price;
cards;
000096 8500 0.059 13.27
000099 6000 0.028 14.2
000150 12600 -0.003 7.12
000151 10500 0.026 10.08
000153 2500 0.056 22.75
000155 13000 -0.009 6.85
000156 3600 0.033 14.95
000157 10000 0.06 12.65
000158 10000 0.018 8.38
000159 7000 0.008 12.15
000301 15365 0.04 7.31
000488 7700 0.101 13.26
000725 6000 0.044 12.33
000835 1338 0.07 22.58
000869 3200 0.194 18.29
000877 7800 -0.084 12.55
000885 6000 -0.073 12.48
000890 16934 0.031 9.12
000892 12000 0.031 7.88
000897 14166 0.002 6.91
000900 21423 0.058 8.59
000901 4800 0.005 27.95
000902 6500 -0.031 10.92
000903 6000 0.109 11.79
000905 9500 0.046 9.29
000906 6650 0.007 14.47
000908 8988 0.006 8.28
000909 6000 0.002 9.99
000910 8000 0.036 8.9
000911 7280 0.067 9.01
000912 15000 0.112 8.06
000913 8450 0.062 11.86
000915 4599 0.001 14.4
000916 34000 0.038 5.15
000917 11800 0.086 16.23
000918 6000 -0.045 10.12
;
run;
/**回归分析**/
proc reg data=sasuser.stock;
var eps scale price;
model price=eps scale;
run;
/**eps变量不显著,重新回归**/
proc reg data=sasuser.stock;
var scale price;
model price=scale;
run;
在 MODEL 语句中加上 SELECTION 优化方法 就可以自动挑选变量 可供选择的 优化方法有 NONE 全用 FORWARD 逐步引入法 BACKWARD 逐步消去法 STEPWISE 智能消除法
/**逐步回归**/
proc reg data=sasuser.stock;
var scale price eps;
model price=scale eps /selection=stepwise;
print cli;
/**print cli 可以输出预报值和 95 的置信区间**/
run;
例子
/**非线性回归**/
/**先假设模型**/
/**price=b0*eps+b1*scale+b2*eps*scale+b3**/
proc nlin data=sasuser.stock;
model price=b0*eps+b1*scale+b2*eps*scale+b3;
parameters b0=1 b1=1 b2=1 b3=10;
run;
data sasuser.people;
input time pop;
cards;
1790 3929
1800 5308
1810 7239
1820 9638
1830 12866
1840 17069
1850 23191
1860 31443
1870 39818
1880 50115
1890 62947
1900 75994
1910 91972
1920 10571
1930 122775
1940 131669
1950 151325
1960 179323
1970 203211
1980 226542
;
run;
proc nlin data=sasuser.people;
parms c0=3.9 c1=0.222;
model pop=c0*exp(c1*(time-1970));
run;
/**3.GLM过程**/
data sasuser.bond;
input rate price;
cards;
0.01 127.6
0.48 124.0
0.71 110.8
0.95 103.9
1.19 101.5
0.01 130.1
0.48 122.0
1.44 92.3
0.71 113.1
1.96 83.7
0.01 128
1.44 91.4
1.96 86.2
;
run;
symbol1 c=blue;
proc gplot;
plot price*rate / vm=1 cframe=ligr;
run;
/**改进**/
proc glm;
model price=rate;
run;
病态数据拟合过程
data sasuser.longley;
input Employment Prices GNP Jobless Military PopSize Year;
cards;
60323 83.0 234289 2356 1590 107608 1947
61122 88.5 259426 2325 1456 108632 1948
60171 88.2 258054 3682 1616 109773 1949
61187 89.5 284599 3351 1650 110929 1950
63221 96.2 328975 2099 3099 112075 1951
63639 98.1 346999 1932 3594 113270 1952
64989 99.0 365385 1870 3547 115094 1953
63761 100.0 363112 3578 3350 116219 1954
66019 101.2 397469 2904 3048 117388 1955
67857 104.6 419180 2822 2857 118734 1956
68169 108.4 442769 2936 2798 120445 1957
66513 110.8 444546 4681 2637 121950 1958
68655 112.6 482704 3813 2552 123366 1959
69564 114.2 502601 3931 2514 125368 1960
69331 115.7 518173 4806 2572 127852 1961
70551 116.9 554894 4007 2827 130081 1962
;
run;
proc orthoreg data=sasuser.longley;
model Employment=Prices Prices*Prices
GNP GNP*GNP
Jobless Jobless*Jobless
Military Military*Military
PopSize PopSize*PopSize
Year Year*Year;
run;
proc glm data=sasuser.longley;
model Employment=Prices Prices*Prices
GNP GNP*GNP
Jobless Jobless*Jobless
Military Military*Military
PopSize PopSize*PopSize
Year Year*Year;
run;
ANOVA过程一般用法
proc anova data =数据集名称;
class 因素;
model 实验结果=因素;
run;
例子
data sasuser.xiaomai;
input Brand$ Nitrogen@@;
cards;
3DOK1 19.4
3DOK1 32.6
3DOK1 27.0
3DOK1 32.1
3DOK1 33.0
3DOK5 17.7
3DOK5 24.8
3DOK5 27.9
3DOK5 25.2
3DOK5 24.3
3DOK4 17.0
3DOK4 19.4
3DOK4 9.1
3DOK4 11.9
3DOK4 15.8
3DOK7 20.7
3DOK7 21.0
3DOK7 20.5
3DOK7 18.8
3DOK7 18.6
3DOK13 14.3
3DOK13 14.4
3DOK13 11.8
3DOK13 11.6
3DOK13 14.2
COMPOS 17.3
COMPOS 19.4
COMPOS 19.1
COMPOS 16.9
COMPOS 20.8
;
run;
data test;
input brand $ Nitrogen @@;
datalines;
3DOK1 19.4 3DOK1 32.6 3DOK1 27.0 3DOK1 32.1 3DOK1 33.0
3DOK5 17.7 3DOK5 24.8 3DOK5 27.9 3DOK5 25.2 3DOK5 24.3
3DOK4 17.0 3DOK4 19.4 3DOK4 9.1 3DOK4 11.9 3DOK4 15.8
3DOK7 20.7 3DOK7 21.0 3DOK7 20.5 3DOK7 18.8 3DOK7 18.6
3DOK13 14.3 3DOK13 14.4 3DOK13 11.8 3DOK13 11.6 3DOK13 14.2
COMPOS 17.3 COMPOS 19.4 COMPOS 19.1 COMPOS 16.9 COMPOS 20.8
;
proc anova ;
class brand;
model Nitrogen =brand;
run;
anova过程添加means语句,可以比较因素取值之间的差异
proc anova;
class brand;
model Nitrogen =brand;
means brand;
run;
proc anova;
class brand;
model Nitrogen =brand;
means brand;
run;
/**加入**/
means brand/t;
run;
means brand/bon;
run;
means brand/regwq;
run;
means brand/tukey;
run;
GLM也可以进行单因素方差检验
proc glm data=数据集;
class 因素;
model 实验结果=因素;
run;
proc glm;
class brand;
model Nitrogen =brand;
run;
data PainRelief;
input PainLevel Codeine Acupuncture Relief @@;
datalines;
1 1 1 0.0 1 2 1 0.5 1 1 2 0.6 1 2 2 1.2
2 1 1 0.3 2 2 1 0.6 2 1 2 0.7 2 2 2 1.3
3 1 1 0.4 3 2 1 0.8 3 1 2 0.8 3 2 2 1.6
4 1 1 0.4 4 2 1 0.7 4 1 2 0.9 4 2 2 1.5
5 1 1 0.6 5 2 1 1.0 5 1 2 1.5 5 2 2 1.9
6 1 1 0.9 6 2 1 1.4 6 1 2 1.6 6 2 2 2.3
7 1 1 1.0 7 2 1 1.8 7 1 2 1.7 7 2 2 2.1
8 1 1 1.2 8 2 1 1.7 8 1 2 1.6 8 2 2 2.4
;
run;
proc anova;
class PainLevel Codeine Acupuncture;
model Relief = PainLevel Codeine|Acupuncture;
run;
/** | 考虑了交互作用**/
proc anova data=painrelief;
class PainLevel Codeine Acupuncture;
model Relief = PainLevel Codeine Acupuncture;
run;
/**没有考虑交互作用**/
means painlevel codeine acupuncture;
run;
/**因素影响大小**/
例子
data heart;
input A 1 B 2 C 3 D 4 E 5 F 6 P;
y=arsin(sqrt(P/100));
cards;
111111 68
111222 60
122112 45
122221 76
212122 45
212211 64
221121 80
221212 65
;
run;
proc anova;
class A B C D E F;
model y=A B C D E F;
run;
proc anova;
class B C D E F;
model y=B C D E F;
run;
means B C D E F;
run;
例子
data twoway;
input Treatment Block y @@;
datalines;
1 1 17 1 1 28 1 1 19 1 1 21 1 1 19
1 2 43 1 2 30 1 2 39 1 2 44 1 2 44
1 3 16
2 1 21 2 1 21 2 1 24 2 1 25
2 2 39 2 2 45 2 2 42 2 2 47
2 3 19 2 3 22 2 3 16
3 1 22 3 1 30 3 1 33 3 1 31
3 2 46
3 3 26 3 3 31 3 3 26 3 3 33 3 3 29 3 3 25
;
proc glm data=twoway;
class Treatment Block;
model y=Treatment |Block;
run;
proc glm data=twoway;
class Treatment Block;
model y =Treatment Block;
means Treatment Block;
run;
一般用法
proc freq data=数据集;
table 因素A*因素B/chisq;
weight 实验结果;
run;
注意卡方检验要求每个单元频数不小于5,否则要用fisher精确检验
/**列联表检验**/
data heart;
do a =1 to 2;
do b=1 to 2;
input f@@;
output;/**每一次观测都存**/
end;
end;
cards;
243 185
156 278
;
run;
proc freq;
weight f;
tables a*b / chisq;
run;
data score;
do sex=1 to 2;
do mark =1 to 2;
input f@@;
output;
end;
end;
cards;
14 16
18 17
;
run;
proc freq;
table sex*mark/nopct norow nocol chisq expected;
weight f;
run;
data white;
input skin degree num;
cards;
1 0 126
1 1 165
2 0 236
2 1 465
3 0 65
3 1 231
;
run;
proc freq;
tables skin*degree / measures nopct norow nocol chisq expected;
weight num;
run;