使用Tabplot可视化大型数据集

我是被图mark的,先记录下来。
image.png

require(ggplot2)
# devtools::install_github("mtennekes/tabplot")
library(tabplot)
data(diamonds)
## add some NA's
is.na(diamonds$price) <- diamonds$cut == "Ideal"
is.na(diamonds$cut) <- (runif(nrow(diamonds)) > 0.8)
tableplot(diamonds)

# 下面的表格图由五列组成,其中数据按价格排序。添加的缺失值位于底部,并且(默认情况下)以鲜红色显示
tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price)


tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price, 
          from = 0, to = 5)


tableplot(diamonds, subset = price < 5000 & cut == "Premium")

tablePalettes()
tableplot(diamonds, pals = list(cut="Set1(6)", color="Set5", clarity=rainbow(8)))


diamonds$carat_class <- num2fac(diamonds$carat, n=20)
diamonds$price_class <- num2fac(diamonds$price, n=100)

tableplot(diamonds, select=c(carat, price, carat_class, price_class))



# create large dataset
large_diamonds <- diamonds[rep(seq.int(nrow(diamonds)), 10),]

system.time({
  p <- tablePrepare(large_diamonds)
})
# 用户 系统 流逝 
# 0.78 0.27 1.06 

system.time({
  tableplot(p, plot=FALSE)
})
# 用户 系统 流逝 
# 0.14 0.09 0.23 

system.time({
  tableplot(p, sortCol=price, nBins=200, plot=FALSE)
})
# 用户 系统 流逝 
# 0.11 0.12 0.24

# 虽然在中等台式计算机上第一步​​需要几秒钟,但与直接方法相比,从中间结果(对象p)创建表图的处理时间非常短

system.time({
  tableplot(p, sample=TRUE)
})
# 用户 系统 流逝 
# 0.31 0.25 0.56

carat.norm <- with(diamonds, carat / max(diamonds$carat))

# draw samples
exp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=carat.norm, replace=TRUE),]
chp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=1-carat.norm, replace=TRUE),]

tp1 <- tableplot(exp.diamonds, plot=FALSE)
tp2 <- tableplot(chp.diamonds, plot=FALSE)

plot(tp2 - tp1)

tab <- tableplot(diamonds, plot = FALSE)
summary(tab)

tableplot(diamonds, select = 1:7, fontsize = 14, legend.lines = 8, title = "Shine on you crazy Diamond", fontsize.title = 18)

tab2 <- tableChange(tab, select_string = c("carat", "price", "cut", "color", "clarity"), pals = list(cut="Set1(2)"))
plot(tab2)

tableSave(tab, filename = "diamonds.png", width = 5, height = 3, fontsize = 6, legend.lines = 6)
  • R package for tableplot visualisation
  • Visualization of large datasets with tabplot

希望以后的数据分析能用到这个包

你可能感兴趣的:(使用Tabplot可视化大型数据集)