R语言实战--源码分析
#将位数设置为2,可以让读出的结果更加方便
options(digits = 2)
Student<-c("John Davis","Angela williams","Bullwinkle Moose","David Homes",
"Janice Markhammer","Chery Cushing",
"Reuven Ytzrhak","Greg xnox","Hoel England",
"Mary Rayburn")
Math<-c(502,600,412,358,495,512,410,625,573,522)
Science<-c(95,99,80,82,75,85,82,95,89,86)
English<-c(25,22,18,15,20,18,15,30,27,18)
#stringsAsFactors = FALSE说明这个字符串不能做因子
roster<-data.frame(Student,Math,Science,English,stringsAsFactors = FALSE)
#做标量化scale()可以将变量进行标准化
z<-scale(roster[,2:4])
#得到综合的平均分
score<-apply(z,1,mean)
#rbind将记录进行合并;cbind将列进行合并
roster<-cbind(roster,score)
roster
#给出学生成绩百分数的划分
y<-quantile(score,c(.8,.6,.4,.2))
y
#0.82相当于这个学生是80分,0.17相当于学生是60分
#按照这个级别设置逻辑运算符
#在roster数据框中加入一个新的列grade
roster$grade[score>=y[1]]<-"A"
roster$grade[score=y[2]]<-"B"
roster$grade[score=y[3]]<-"C"
roster$grade[score=y[4]]<-"D"
roster$grade[score #将位数设置为2,可以让读出的结果更加方便
> options(digits = 2)
> Student<-c("John Davis","Angela williams","Bullwinkle Moose","David Homes",
+ "Janice Markhammer","Chery Cushing",
+ "Reuven Ytzrhak","Greg xnox","Hoel England",
+ "Mary Rayburn")
> Math<-c(502,600,412,358,495,512,410,625,573,522)
> Science<-c(95,99,80,82,75,85,82,95,89,86)
> English<-c(25,22,18,15,20,18,15,30,27,18)
> #stringsAsFactors = FALSE说明这个字符串不能做因子
> roster<-data.frame(Student,Math,Science,English,stringsAsFactors = FALSE)
> roster
Student Math Science English
1 John Davis 502 95 25
2 Angela williams 600 99 22
3 Bullwinkle Moose 412 80 18
4 David Homes 358 82 15
5 Janice Markhammer 495 75 20
6 Chery Cushing 512 85 18
7 Reuven Ytzrhak 410 82 15
8 Greg xnox 625 95 30
9 Hoel England 573 89 27
10 Mary Rayburn 522 86 18
> #做标量化scale()可以将变量进行标准化
> z<-scale(roster[,2:4])
> z
Math Science English
[1,] 0.013 1.08 0.82
[2,] 1.143 1.60 0.24
[3,] -1.026 -0.89 -0.55
[4,] -1.649 -0.63 -1.14
[5,] -0.068 -1.55 -0.16
[6,] 0.128 -0.24 -0.55
[7,] -1.049 -0.63 -1.14
[8,] 1.432 1.08 1.81
[9,] 0.832 0.29 1.22
[10,] 0.243 -0.10 -0.55
attr(,"scaled:center")
Math Science English
501 87 21
attr(,"scaled:scale")
Math Science English
86.7 7.6 5.1
> #得到综合的平均分
> score<-apply(z,1,mean)
> score
[1] 0.64 0.99 -0.82 -1.14 -0.59 -0.22 -0.94 1.44 0.78 -0.14
> #rbind将记录进行合并;cbind将列进行合并
> roster<-cbind(roster,score)
> roster
Student Math Science English score
1 John Davis 502 95 25 0.64
2 Angela williams 600 99 22 0.99
3 Bullwinkle Moose 412 80 18 -0.82
4 David Homes 358 82 15 -1.14
5 Janice Markhammer 495 75 20 -0.59
6 Chery Cushing 512 85 18 -0.22
7 Reuven Ytzrhak 410 82 15 -0.94
8 Greg xnox 625 95 30 1.44
9 Hoel England 573 89 27 0.78
10 Mary Rayburn 522 86 18 -0.14
> #给出学生成绩百分数的划分
> y<-quantile(score,c(.8,.6,.4,.2))
> y
80% 60% 40% 20%
0.82 0.17 -0.37 -0.85
> #0.82相当于这个学生是80分,0.17相当于学生是60分
> #按照这个级别设置逻辑运算符
> #在roster数据框中加入一个新的列grade
> roster$grade[score>=y[1]]<-"A"
> roster$grade[score=y[2]]<-"B"
> roster$grade[score=y[3]]<-"C"
> roster$grade[score=y[4]]<-"D"
> roster$grade[score roster
Student Math Science English score grade
1 John Davis 502 95 25 0.64 B
2 Angela williams 600 99 22 0.99 A
3 Bullwinkle Moose 412 80 18 -0.82 D
4 David Homes 358 82 15 -1.14 F
5 Janice Markhammer 495 75 20 -0.59 D
6 Chery Cushing 512 85 18 -0.22 C
7 Reuven Ytzrhak 410 82 15 -0.94 F
8 Greg xnox 625 95 30 1.44 A
9 Hoel England 573 89 27 0.78 B
10 Mary Rayburn 522 86 18 -0.14 C
> #把学生姓名做拆分:按firstname和lastname进行拆分
> name<-strsplit((roster$Student)," ")
> name
[[1]]
[1] "John" "Davis"
[[2]]
[1] "Angela" "williams"
[[3]]
[1] "Bullwinkle" "Moose"
[[4]]
[1] "David" "Homes"
[[5]]
[1] "Janice" "Markhammer"
[[6]]
[1] "Chery" "Cushing"
[[7]]
[1] "Reuven" "Ytzrhak"
[[8]]
[1] "Greg" "xnox"
[[9]]
[1] "Hoel" "England"
[[10]]
[1] "Mary" "Rayburn"
> #重新置变量名
> #sapply()函数;这个函数提取列表中每个成分的第一个元素,
> #放入一个存储名字的向量中;然后提取每个成分的第二个元素,
> #放入姓氏的向量中;
> #“[”表示提取某个对象一部分的函数,在这里它是提取name中的第一个和第二个元素的
> lastname<-sapply(name,"[",2)
> lastname
[1] "Davis" "williams" "Moose" "Homes" "Markhammer"
[6] "Cushing" "Ytzrhak" "xnox" "England" "Rayburn"
> firstname<-sapply(name,"[",1)
> #[,-1]:表示行不变,第一列(姓名)删除
> roster<-cbind(firstname,lastname,roster[,-1])
> roster
firstname lastname Math Science English score grade
1 John Davis 502 95 25 0.64 B
2 Angela williams 600 99 22 0.99 A
3 Bullwinkle Moose 412 80 18 -0.82 D
4 David Homes 358 82 15 -1.14 F
5 Janice Markhammer 495 75 20 -0.59 D
6 Chery Cushing 512 85 18 -0.22 C
7 Reuven Ytzrhak 410 82 15 -0.94 F
8 Greg xnox 625 95 30 1.44 A
9 Hoel England 573 89 27 0.78 B
10 Mary Rayburn 522 86 18 -0.14 C
> #进行排序:第一排序为姓,第一排序为名。且是按行来做的
> roster<-roster[order(lastname,firstname),]
> roster
firstname lastname Math Science English score grade
6 Chery Cushing 512 85 18 -0.22 C
1 John Davis 502 95 25 0.64 B
9 Hoel England 573 89 27 0.78 B
4 David Homes 358 82 15 -1.14 F
5 Janice Markhammer 495 75 20 -0.59 D
3 Bullwinkle Moose 412 80 18 -0.82 D
10 Mary Rayburn 522 86 18 -0.14 C
2 Angela williams 600 99 22 0.99 A
8 Greg xnox 625 95 30 1.44 A
7 Reuven Ytzrhak 410 82 15 -0.94 F