代码收藏

文章目录

        • 1.将样本按照一定比例分为训练样本和测试样本
        • 2. 比较代码运行时间 (fread读取数据很快,提前library(data.table)
        • 3.循环存入pdf,到某个文件夹
        • 4.循环读取指定路径下的csv
        • 5.循环读指定文件夹下的rda
        • 6. 编写查看字段缺失值函数
        • 7.查看案例缺失度manyNAs函数(DMwR)

1.将样本按照一定比例分为训练样本和测试样本

split.data <- function(data, p =0.7, s = 666){
  set.seed(s)
  index =sample(1:dim(data)[1])
  train = data[index[1:floor(dim(data)[1]*p)],]
  test = data[index[((ceiling(dim(data)[1] *p ))+1):dim(data)[1]],]
  return(list(train = train, test = test))
}

2. 比较代码运行时间 (fread读取数据很快,提前library(data.table)

> library(data.table)
data.table 1.11.8  Latest news: r-datatable.com

> timestart<-Sys.time()
> a <- fread("D:\\AA00004.csv",header = TRUE)
> timeend<-Sys.time()
> runningtime<-timeend-timestart
> print(runningtime)
Time difference of 0.03100109 secs

> timestart <- Sys.time()
> b <- read.csv("D:\\AA00004.csv",header =T)
> timeend <- Sys.time()
> runningtime2 <- timeend-timestart
> print(runningtime2)
Time difference of 0.5470309 secs

3.循环存入pdf,到某个文件夹

批量读取,指定文件夹下的所有csv

#分段画地图
options(baidumap.key = 'XXXXXXX')
library(baidumap)
library(ggmap)
dir.create('carAA2_PDF')  #创建一个新的文件夹
setwd('carAA2_PDF')  #设置将图放在该文件夹下
for (i in 1:(length(t)/2)) {
  pdfname <- paste('route_',i,'.pdf')
  pdf(file = pdfname)
  route <- subset(x = subcarAA2,subcarAA2$rootID == i)
  loc <- route[,c('lng','lat')]
  loc <- unique(loc)
  lon_range=extendrange(loc$lng)         #找出路径的经度范围
  lat_range=extendrange(loc$lat)         #找出路径的纬度范围
  center=c(mean(lon_range), mean(lat_range))   #计算路径的中心点经纬度坐标,作为地图中心
  Zoom=calc_zoom(lon_range, lat_range)-1       #根据经纬度范围计算地图缩放度。
  options(baidumap.key = 'ZRWY1SKU7y6HilsmjH4pvwgY3hlKi0FE')
  map <- getBaiduMap(center, width = 640, height = 640, zoom = Zoom, scale = 2)
  print(ggmap(map)+
          ##绘制点图,横纵坐标分别为经纬度,不透明度0.8
          geom_point(data = loc, 
                     aes(lng,lat,alpha = 1),col="darkblue",size=1))
  dev.off()
}

4.循环读取指定路径下的csv

myfolder <- choose.dir()
path <- "D:\\用户目录\\10辆车"
fileNames <- dir(path)  ##获取该路径下的文件名
filePath <- sapply(fileNames, function(x){ 
  paste(path,x,sep='/')})   ##生成读取文件路径
data_10 <- lapply(filePath, function(x){
  read.csv(x, header=T)})  ##读取数据,结果为list
View(data_10[[1]])

5.循环读指定文件夹下的rda

#读取目标目录的rda文件
path <-  "G:\\rda"
subcarNames <- dir(path)  ##获取该路径下的文件名
subdata_10 <- list()#生成个空列表

for (i in 1:length(subcarNames)) {
  file <- paste(path, subcarNames[i], sep = '/')#获取完整的路径名
  load_data <- load(file)#加载数据
  subdata_10[[i]] <- eval(parse(text = load_data))#解析、存储数据
}

save('subcarNames.rda')
save(subdata_10,file = 'subdata_10.rda')

6. 编写查看字段缺失值函数

#编写查看字段缺失值函数,缺失率
missing <- function(data){
  dim1 <- dim(data)[1]
  dim2 <- dim(data)[2]
  lost <- numeric(dim2)
  for (i in 1:dim2) {
    lost[i] <- sum(is.na(data[,i]))/dim1
  }
  loss<- cbind(names(data),lost)
  print(loss)
}

7.查看案例缺失度manyNAs函数(DMwR)

#缺失值处理
# sum(complete.cases(data)) #总体完整样本
library(DMwR)  #填补缺失值
length(manyNAs(data,0.3)) #缺失值大于20%
lndex <- manyNAs(data,0.3)
data <- data[-lndex,]

你可能感兴趣的:(R语言,备忘)