R语言实战--源码分析

R语言实战--源码分析

 

#将位数设置为2,可以让读出的结果更加方便

options(digits = 2)

Student<-c("John Davis","Angela williams","Bullwinkle Moose","David Homes",

           "Janice Markhammer","Chery Cushing",

           "Reuven Ytzrhak","Greg xnox","Hoel England",

           "Mary Rayburn")

Math<-c(502,600,412,358,495,512,410,625,573,522)

Science<-c(95,99,80,82,75,85,82,95,89,86)

English<-c(25,22,18,15,20,18,15,30,27,18)

#stringsAsFactors = FALSE说明这个字符串不能做因子

roster<-data.frame(Student,Math,Science,English,stringsAsFactors = FALSE)

#做标量化scale()可以将变量进行标准化

z<-scale(roster[,2:4])

#得到综合的平均分

score<-apply(z,1,mean)

#rbind将记录进行合并;cbind将列进行合并

roster<-cbind(roster,score)

roster

#给出学生成绩百分数的划分

y<-quantile(score,c(.8,.6,.4,.2))

y

#0.82相当于这个学生是80分,0.17相当于学生是60分

#按照这个级别设置逻辑运算符

#在roster数据框中加入一个新的列grade

roster$grade[score>=y[1]]<-"A"

roster$grade[score=y[2]]<-"B"

roster$grade[score=y[3]]<-"C"

roster$grade[score=y[4]]<-"D"

roster$grade[score #将位数设置为2,可以让读出的结果更加方便

> options(digits = 2)

> Student<-c("John Davis","Angela williams","Bullwinkle Moose","David Homes",

+ "Janice Markhammer","Chery Cushing",

+ "Reuven Ytzrhak","Greg xnox","Hoel England",

+ "Mary Rayburn")

> Math<-c(502,600,412,358,495,512,410,625,573,522)

> Science<-c(95,99,80,82,75,85,82,95,89,86)

> English<-c(25,22,18,15,20,18,15,30,27,18)

> #stringsAsFactors = FALSE说明这个字符串不能做因子

> roster<-data.frame(Student,Math,Science,English,stringsAsFactors = FALSE)

> roster

             Student Math Science English

1         John Davis  502      95      25

2    Angela williams  600      99      22

3   Bullwinkle Moose  412      80      18

4        David Homes  358      82      15

5  Janice Markhammer  495      75      20

6      Chery Cushing  512      85      18

7     Reuven Ytzrhak  410      82      15

8          Greg xnox  625      95      30

9       Hoel England  573      89      27

10      Mary Rayburn  522      86      18

> #做标量化scale()可以将变量进行标准化

> z<-scale(roster[,2:4])

> z

        Math Science English

 [1,]  0.013    1.08    0.82

 [2,]  1.143    1.60    0.24

 [3,] -1.026   -0.89   -0.55

 [4,] -1.649   -0.63   -1.14

 [5,] -0.068   -1.55   -0.16

 [6,]  0.128   -0.24   -0.55

 [7,] -1.049   -0.63   -1.14

 [8,]  1.432    1.08    1.81

 [9,]  0.832    0.29    1.22

[10,]  0.243   -0.10   -0.55

attr(,"scaled:center")

   Math Science English

    501      87      21

attr(,"scaled:scale")

   Math Science English

   86.7     7.6     5.1

> #得到综合的平均分

> score<-apply(z,1,mean)

> score

 [1]  0.64  0.99 -0.82 -1.14 -0.59 -0.22 -0.94  1.44  0.78 -0.14

> #rbind将记录进行合并;cbind将列进行合并

> roster<-cbind(roster,score)

> roster

             Student Math Science English score

1         John Davis  502      95      25  0.64

2    Angela williams  600      99      22  0.99

3   Bullwinkle Moose  412      80      18 -0.82

4        David Homes  358      82      15 -1.14

5  Janice Markhammer  495      75      20 -0.59

6      Chery Cushing  512      85      18 -0.22

7     Reuven Ytzrhak  410      82      15 -0.94

8          Greg xnox  625      95      30  1.44

9       Hoel England  573      89      27  0.78

10      Mary Rayburn  522      86      18 -0.14

> #给出学生成绩百分数的划分

> y<-quantile(score,c(.8,.6,.4,.2))

> y

  80%   60%   40%   20%

 0.82  0.17 -0.37 -0.85

 > #0.82相当于这个学生是80分,0.17相当于学生是60分

> #按照这个级别设置逻辑运算符

> #在roster数据框中加入一个新的列grade

> roster$grade[score>=y[1]]<-"A"

> roster$grade[score=y[2]]<-"B"

> roster$grade[score=y[3]]<-"C"

> roster$grade[score=y[4]]<-"D"

> roster$grade[score roster

             Student Math Science English score grade

1         John Davis  502      95      25  0.64     B

2    Angela williams  600      99      22  0.99     A

3   Bullwinkle Moose  412      80      18 -0.82     D

4        David Homes  358      82      15 -1.14     F

5  Janice Markhammer  495      75      20 -0.59     D

6      Chery Cushing  512      85      18 -0.22     C

7     Reuven Ytzrhak  410      82      15 -0.94     F

8          Greg xnox  625      95      30  1.44     A

9       Hoel England  573      89      27  0.78     B

10      Mary Rayburn  522      86      18 -0.14     C

> #把学生姓名做拆分:按firstname和lastname进行拆分

> name<-strsplit((roster$Student)," ")

> name

[[1]]

[1] "John"  "Davis"

 

[[2]]

[1] "Angela"   "williams"

 

[[3]]

[1] "Bullwinkle" "Moose"     

 

[[4]]

[1] "David" "Homes"

 

[[5]]

[1] "Janice"     "Markhammer"

 

[[6]]

[1] "Chery"   "Cushing"

 

[[7]]

[1] "Reuven"  "Ytzrhak"

 

[[8]]

[1] "Greg" "xnox"

 

[[9]]

[1] "Hoel"    "England"

 

[[10]]

[1] "Mary"    "Rayburn"

> #重新置变量名

> #sapply()函数;这个函数提取列表中每个成分的第一个元素,

> #放入一个存储名字的向量中;然后提取每个成分的第二个元素,

> #放入姓氏的向量中;

> #“[”表示提取某个对象一部分的函数,在这里它是提取name中的第一个和第二个元素的

> lastname<-sapply(name,"[",2)

> lastname

 [1] "Davis"      "williams"   "Moose"      "Homes"      "Markhammer"

 [6] "Cushing"    "Ytzrhak"    "xnox"       "England"    "Rayburn"   

> firstname<-sapply(name,"[",1)

> #[,-1]:表示行不变,第一列(姓名)删除

> roster<-cbind(firstname,lastname,roster[,-1])

> roster

    firstname   lastname Math Science English score grade

1        John      Davis  502      95      25  0.64     B

2      Angela   williams  600      99      22  0.99     A

3  Bullwinkle      Moose  412      80      18 -0.82     D

4       David      Homes  358      82      15 -1.14     F

5      Janice Markhammer  495      75      20 -0.59     D

6       Chery    Cushing  512      85      18 -0.22     C

7      Reuven    Ytzrhak  410      82      15 -0.94     F

8        Greg       xnox  625      95      30  1.44     A

9        Hoel    England  573      89      27  0.78     B

10       Mary    Rayburn  522      86      18 -0.14     C

> #进行排序:第一排序为姓,第一排序为名。且是按行来做的

> roster<-roster[order(lastname,firstname),]

> roster

    firstname   lastname Math Science English score grade

6       Chery    Cushing  512      85      18 -0.22     C

1        John      Davis  502      95      25  0.64     B

9        Hoel    England  573      89      27  0.78     B

4       David      Homes  358      82      15 -1.14     F

5      Janice Markhammer  495      75      20 -0.59     D

3  Bullwinkle      Moose  412      80      18 -0.82     D

10       Mary    Rayburn  522      86      18 -0.14     C

2      Angela   williams  600      99      22  0.99     A

8        Greg       xnox  625      95      30  1.44     A

7      Reuven    Ytzrhak  410      82      15 -0.94     F

 

你可能感兴趣的:(R)