【R语言 数据处理】R做数据处理中的小技巧

一、主要内容
创建新变量
修改数据
修改变量名
处理缺失值
数据排序
数据合并
数据筛选
抽样

二、R语言代码

rm(list=ls())
gc()
manager <- c(1,2,3,4,5)
date <- c("10/24/08", "10/28/08", "10/1/08", "10/12/08", "5/1/09")
country <- c("US", "US", "UK", "UK", "UK")
gender <- c("M", "F", "F", "M", "F")
age <- c(32,45,25,39,99)
q1 <- c(5,3,3,3,2)
q2 <- c(4,5,5,3,2)
q3 <- c(5,2,5,4,1)
q4 <- c(5,5,5,NA,2)
q5 <- c(5,5,2,NA,1)
leadership <- data.frame(manager, date ,country, gender, age, q1,q2,q3,q4,q5, stringsAsFactors=F)


############创建新变量
leadership <- transform(leadership,meanx= (q1+q2+q3+q4+q5)/5)

############修改数据
leadership$age[leadership$age==99] <- NA

leadership$agecat2 <- NA

leadership <- within(leadership,{
  agecat2[age>75] <- "Elder"
  agecat2[age>=55 & age<=75] <- "Middle Aged"
  agecat2[age<55] <- "Young"}
)

#####################修改变量名#################
library(plyr)
leadership <- rename(leadership,c(manager="managerID", date="testDate"))



##################处理缺失值################
(leadership <- transform(leadership,meanx=rowMeans(leadership[6:10],na.rm=T)))
options(digits=3)###设置有效数字



###################数据排序###############

leadership[order(age),]
leadership[order(gender,age),]
leadership[order(gender,-age),]


####################数据合并###############

manager <- c(1,2,3,4,5)
q6 <- c(4,3,4,2,1)
leadership2 <- data.frame(manager,q6)
leadership2
merge(leadership,leadership2,by.x ="managerID",by.y = "manager")


######################数据筛选#############
leadership[,c(6:10)]

leadership[c("q1","q2","q3","q4","q5")]

myvars <- paste("q",1:5,sep="")
(newdata <- leadership[myvars])

leadership[gender=='M' & age>30,]
subset(leadership, age>=35 | age<24, select=gender:q4)


#####################抽样########################
leadership[sample(1:nrow(leadership),3,replace=F),]


你可能感兴趣的:(数据科学--R语言)