split.data <- function(data, p =0.7, s = 666){
set.seed(s)
index =sample(1:dim(data)[1])
train = data[index[1:floor(dim(data)[1]*p)],]
test = data[index[((ceiling(dim(data)[1] *p ))+1):dim(data)[1]],]
return(list(train = train, test = test))
}
> library(data.table)
data.table 1.11.8 Latest news: r-datatable.com
> timestart<-Sys.time()
> a <- fread("D:\\AA00004.csv",header = TRUE)
> timeend<-Sys.time()
> runningtime<-timeend-timestart
> print(runningtime)
Time difference of 0.03100109 secs
> timestart <- Sys.time()
> b <- read.csv("D:\\AA00004.csv",header =T)
> timeend <- Sys.time()
> runningtime2 <- timeend-timestart
> print(runningtime2)
Time difference of 0.5470309 secs
批量读取,指定文件夹下的所有csv
#分段画地图
options(baidumap.key = 'XXXXXXX')
library(baidumap)
library(ggmap)
dir.create('carAA2_PDF') #创建一个新的文件夹
setwd('carAA2_PDF') #设置将图放在该文件夹下
for (i in 1:(length(t)/2)) {
pdfname <- paste('route_',i,'.pdf')
pdf(file = pdfname)
route <- subset(x = subcarAA2,subcarAA2$rootID == i)
loc <- route[,c('lng','lat')]
loc <- unique(loc)
lon_range=extendrange(loc$lng) #找出路径的经度范围
lat_range=extendrange(loc$lat) #找出路径的纬度范围
center=c(mean(lon_range), mean(lat_range)) #计算路径的中心点经纬度坐标,作为地图中心
Zoom=calc_zoom(lon_range, lat_range)-1 #根据经纬度范围计算地图缩放度。
options(baidumap.key = 'ZRWY1SKU7y6HilsmjH4pvwgY3hlKi0FE')
map <- getBaiduMap(center, width = 640, height = 640, zoom = Zoom, scale = 2)
print(ggmap(map)+
##绘制点图,横纵坐标分别为经纬度,不透明度0.8
geom_point(data = loc,
aes(lng,lat,alpha = 1),col="darkblue",size=1))
dev.off()
}
myfolder <- choose.dir()
path <- "D:\\用户目录\\10辆车"
fileNames <- dir(path) ##获取该路径下的文件名
filePath <- sapply(fileNames, function(x){
paste(path,x,sep='/')}) ##生成读取文件路径
data_10 <- lapply(filePath, function(x){
read.csv(x, header=T)}) ##读取数据,结果为list
View(data_10[[1]])
#读取目标目录的rda文件
path <- "G:\\rda"
subcarNames <- dir(path) ##获取该路径下的文件名
subdata_10 <- list()#生成个空列表
for (i in 1:length(subcarNames)) {
file <- paste(path, subcarNames[i], sep = '/')#获取完整的路径名
load_data <- load(file)#加载数据
subdata_10[[i]] <- eval(parse(text = load_data))#解析、存储数据
}
save('subcarNames.rda')
save(subdata_10,file = 'subdata_10.rda')
#编写查看字段缺失值函数,缺失率
missing <- function(data){
dim1 <- dim(data)[1]
dim2 <- dim(data)[2]
lost <- numeric(dim2)
for (i in 1:dim2) {
lost[i] <- sum(is.na(data[,i]))/dim1
}
loss<- cbind(names(data),lost)
print(loss)
}
#缺失值处理
# sum(complete.cases(data)) #总体完整样本
library(DMwR) #填补缺失值
length(manyNAs(data,0.3)) #缺失值大于20%
lndex <- manyNAs(data,0.3)
data <- data[-lndex,]