一、主要内容
创建新变量
修改数据
修改变量名
处理缺失值
数据排序
数据合并
数据筛选
抽样
二、R语言代码
rm(list=ls())
gc()
manager <- c(1,2,3,4,5)
date <- c("10/24/08", "10/28/08", "10/1/08", "10/12/08", "5/1/09")
country <- c("US", "US", "UK", "UK", "UK")
gender <- c("M", "F", "F", "M", "F")
age <- c(32,45,25,39,99)
q1 <- c(5,3,3,3,2)
q2 <- c(4,5,5,3,2)
q3 <- c(5,2,5,4,1)
q4 <- c(5,5,5,NA,2)
q5 <- c(5,5,2,NA,1)
leadership <- data.frame(manager, date ,country, gender, age, q1,q2,q3,q4,q5, stringsAsFactors=F)
############创建新变量
leadership <- transform(leadership,meanx= (q1+q2+q3+q4+q5)/5)
############修改数据
leadership$age[leadership$age==99] <- NA
leadership$agecat2 <- NA
leadership <- within(leadership,{
agecat2[age>75] <- "Elder"
agecat2[age>=55 & age<=75] <- "Middle Aged"
agecat2[age<55] <- "Young"}
)
#####################修改变量名#################
library(plyr)
leadership <- rename(leadership,c(manager="managerID", date="testDate"))
##################处理缺失值################
(leadership <- transform(leadership,meanx=rowMeans(leadership[6:10],na.rm=T)))
options(digits=3)###设置有效数字
###################数据排序###############
leadership[order(age),]
leadership[order(gender,age),]
leadership[order(gender,-age),]
####################数据合并###############
manager <- c(1,2,3,4,5)
q6 <- c(4,3,4,2,1)
leadership2 <- data.frame(manager,q6)
leadership2
merge(leadership,leadership2,by.x ="managerID",by.y = "manager")
######################数据筛选#############
leadership[,c(6:10)]
leadership[c("q1","q2","q3","q4","q5")]
myvars <- paste("q",1:5,sep="")
(newdata <- leadership[myvars])
leadership[gender=='M' & age>30,]
subset(leadership, age>=35 | age<24, select=gender:q4)
#####################抽样########################
leadership[sample(1:nrow(leadership),3,replace=F),]