R语言应用-1:数据整理

基础篇----R环境安装及基本语句:

(65条消息) windows 下载 R 和 R studio,以及R语言基本使用_up_xin的博客-CSDN博客https://blog.csdn.net/weixin_41427508/article/details/128148008?spm=1001.2014.3001.5502

目录

一. 数据整理 -- tidyverse包

1. 数据导入导出及读取

2. 类似数据库操作:mutate新建变量、filter/select 过滤行/列、summary+group_by分组汇总、4种join数据合并 (管道操作符:前结果可作为后参数)

3. seperate/unite 列的分裂/合并,pivot_longer/wider 长宽数据转换

二. 可视化作图

三. 统计应用


一. 数据整理 -- tidyverse包

1. 数据导入导出及读取

# 数据操作
library(tidyverse)
# https://www.tidyverse.org/

# csv 数据导入
rawdata <- read.table(file.choose(), header=T, sep=",")
head(rawdata, n=4)
tail(rawdata, n=10)
rawdata[95:105,]
str(rawdata)
# read.csv(file.choose())
# data.table::fread(file.choose())

# csv 数据导出
write.table(rawdata,
            "test.csv",
            sep=",",
            row.names=F)
# write.csv()
# data.table::fwrite()

# 读取excel表
library(readxl)
# excel_sheets(file.choose())
datal <- read_excel(file.choose())

# 批量读取数据
files <- list.files(".\\房地产PB\\")
files
paths <- paste(".\\房地产PB\\", files, sep="")
paths

df <- list()
for (i in 1:lengh(paths)){
    datai <- read_excel(paths[i])
    datai$object <- str_sub(files[i], start=1, end=-6)
    df[[i]] <- datai
    print(i)
}

df_all <- bind_rows(df)

2. 类似数据库操作:mutate新建变量、filter/select 过滤行/列、summary+group_by分组汇总、4种join数据合并 (管道操作符:前结果可作为后参数)

# dplyr
library(dplyr)
head(ToothGrowth)
str(ToothGrowth)

# 新建变量和变量重新赋值
toothgrowth2 <- mutate(ToothGrowth,
                       len = len^2,
                       nv = 1:nrow(ToothGrowth),
                       nv2 = ifelse(nv > median(nv), "H", "L"))
head(toothgrowth2)

# 筛选行(样本)
toothgrowth3 <- filter(toothgrowth2, nv %in% 1:50, nv2 == "H")
toothgrowth3

# 筛选列(变量)
toothgrowth4 <- select(toothgrowth3, c(2,4))  # 也可用一些匹配,例如 start with / end with
toothgrowth4
# 分组计算
summarise(ToothGrowth, len_max=max(len), len_min=min(len))
summarise(group_by(ToothGrowth, supp), len_max = max(len))
summarise(group_by(ToothGrowth, dose, supp), len_max = max(len))

# 管道操作符
library(magrittr)
ToothGrowth %>%
    mutate(nv = 1:nrow(ToothGrowth)) %>%
    filter(nv %in% 1:50) %>%
    select(1:2) %>%
    group_by(supp) %>%
    summarise(len_max = max(len)) %>%
    as.data.frame()

# 连接、合并数据框
library(dplyr)
df1 <- data.frame(c1 = 2:5,
                  c2 = LETTERS[2:5])
df1
df2 <- data.frame(c3 = LETTERS[c(2:3, 20:23)],
                  c4 = sample(1:100, size=6))
df2

# 左连接
left_join(df1, df2, by = c('c2' = 'c3'))
df1 %>% left_join(df2, by = c('c2'='c3'))

# 右连接
df1 %>% right_join(df2, by = c('c2'='c3'))

# 全连接
df1 %>% full_join(df2, by = c('c2'='c3'))

# 内连接
df1 %>% inner_join(df2, by = c('c2'='c3'))

3. seperate/unite 列的分裂/合并,pivot_longer/wider 长宽数据转换

1)列的分类/合并

# 列的分裂与合并
library(tidyr)

# 分裂
df3 <- data.frame( c5 = paste(letters[1:3], 1:3, sep = "-"),
                   c6 = paste(letters[1:3], 1:3, sep = "."),
                   c4 = c("B", "B", "B"),
                   c3 = c("H", "M", "L"))
df3
df4 <- df3 %>%
    separate( col = c5, sep = "-", into = c("c7","c8"), remove=F) %>%
    separate( col = c6, sep = "\\.", into = c("c9","c10"), remove=T)
df4

# 合并
df4 %>%
    unite( col = "c11", c("c7","c8"), sep = "-", remove=F) %>%
    unite( col = "c12", c("c9","c10"), sep = ".", remove=T) %>%
    unite( col = "c13", c("c4","c3"), sep = "", remove=F)

2) 长宽数据转换----画图通常要先转换为长数据

# 长宽数据转换
library(tidyr)

set.seed(42)
df5 <- data.frame(time = rep(2011:2013, each=3),
                  area = rep(letters[1:3], times=3),
                  pop = sample(100:1000, 9),
                  den = round(rnorm(9, mean = 3, sd=0.1), 2),
                  mj = sample(8:12, 9, replace=T))
df5

# 宽数据转长数据
df6 <- df5 %>%
    pivot_longer(cols = 3:5,
                names_to = "varb",
                values_to = "value")
df6

# 长数据转宽数据
df7 <- df6 %>%
    pivot_wider( names_from = c(area, varb),
                 values_from = value)
df7

二. 可视化作图

三. 统计应用

你可能感兴趣的:(r语言,开发语言)