R字符串操作(二)

前文学习了R基础的字符串操作函数,这次来学习 stringr 提供的函数。

library(stringr)
eA <- c("eXample-Strinng", "UPPER", "lower")

检测子字符串

  • str_detect(string, pattern, negate = FALSE)
  • str_which(string, pattern, negate = FALSE)
  • str_count(string, pattern = "")
  • str_locate(string, pattern)
  • str_locate_all(string, pattern)
  • str_starts(string, pattern, negate = FALSE)
  • str_ends(string, pattern, negate = FALSE)

函数 str_detect 检测是否包含子字符串;str_which 返回检测到子字符串的向量索引;str_count 显示子字符串数目; str_locate 显示子字符串的开始与结束位置;函数 str_startsstr_ends 分别检测字符串开头结尾。

> str_detect(eA, "Str")
[1]  TRUE FALSE FALSE

> str_which(eA, "Str")
[1] 1

> str_count(eA, "r")
[1] 1 0 1

> str_locate(eA, "Str")
     start end
[1,]     9  11
[2,]    NA  NA
[3,]    NA  NA
> str_locate_all(eA, "Str")
[[1]]
     start end
[1,]     9  11

[[2]]
     start end

[[3]]
     start end

提取子字符串

  • str_sub(string, start = 1L, end = -1L)
  • str_subset(string, pattern, negate = FALSE)
  • str_extract(string, pattern)
  • str_extract_all(string, pattern, simplify = FALSE)
  • str_match(string, pattern)
  • str_match_all(string, pattern)

函数 str_sub 按位置取子字符串,如果用负数位置表示按 反方向 顺序取,如果位置是0会返回空字符串;函数 str_subset 则是返回包含子字符串的字符串;函数 str_extract 会返回匹配的字符串;函数 str_match 返回多列匹配结果,第一列是整体匹配结果,后面是每一组(正则)匹配结果。

> str_sub(eA, start = 2, end = 6)
[1] "Xampl" "PPER"  "ower" 
> str_sub(eA, start = -5, end = -1)
[1] "rinng" "UPPER" "lower"
> str_sub(eA, start = -5, end = 0)
[1] "" "" ""

> str_subset(eA, "r")
[1] "eXample-Strinng" "lower"          
> str_subset(eA, "R")
[1] "UPPER"

> str_extract(eA, "[a-p]{2,4}")
[1] "ampl" NA     "lo"  
> str_extract_all(eA, "[a-p]{2,4}")
[[1]]
[1] "ampl" "inng"

[[2]]
character(0)

[[3]]
[1] "lo"

> str_match(eA, "(\\w{3})(-)(\\w{3})")
     [,1]      [,2]  [,3] [,4] 
[1,] "ple-Str" "ple" "-"  "Str"
[2,] NA        NA    NA   NA   
[3,] NA        NA    NA   NA   
> str_match_all(eA, "(\\w{3})(-)(\\w{3})")
[[1]]
     [,1]      [,2]  [,3] [,4] 
[1,] "ple-Str" "ple" "-"  "Str"

[[2]]
     [,1] [,2] [,3] [,4]

[[3]]
     [,1] [,2] [,3] [,4]

字符串长度

  • str_length(string)
  • str_pad(string, width, side = c("left", "right", "both"), pad = " ")
  • str_trunc(string, width, side = c("right", "left", "center"), ellipsis = "...")
  • str_trim(string, side = c("both", "left", "right"))

函数 str_length 返回字符串长度;函数 str_pad 是用指定字符填充字符串到目标长度;函数 str_trunc 是截断字符串到一定长度;函数 str_trim 移除字符串2边空白符。

> str_length(eA)
[1] 15  5  5

> str_pad(eA, width = 20, side = "left", pad = "=")
[1] "=====eXample-Strinng" "===============UPPER" "===============lower"
> str_pad(eA, width = 20, side = "right", pad = "=")
[1] "eXample-Strinng=====" "UPPER===============" "lower==============="
> str_pad(eA, width = 20, side = "both", pad = "=")
[1] "==eXample-Strinng===" "=======UPPER========" "=======lower========"

> str_trunc(eA, width = 5, side = "right")
[1] "eX..." "UPPER" "lower"
> str_trunc(eA, width = 5, side = "left")
[1] "...ng" "UPPER" "lower"
> str_trunc(eA, width = 5, side = "left", ellipsis = "+++")
[1] "+++ng" "UPPER" "lower"

修改字符串

  • str_sub(string, start = 1L, end = -1L, omit_na = FALSE) <- value
  • str_replace(string, pattern, replacement)
  • str_replace_all(string, pattern, replacement)
  • str_to_upper(string, locale = "en")
  • str_to_lower(string, locale = "en")

函数 str_sub 会直接修改原字符串,按照位置替换新内容;函数 str_replace 替换目标子字符串;函数 str_to_upperstr_to_lower 是大小写转换。

> str_sub(eA, start = 1, end = 5) <- "New"
> eA
[1] "Newle-Strinng" "New"           "New"

> str_replace(eA[1], pattern = "nn", replacement = "n")
[1] "eXample-String"

> str_to_upper(eA)
[1] "EXAMPLE-STRINNG" "UPPER"           "LOWER"          
> str_to_lower(eA)
[1] "example-strinng" "upper"           "lower"

拼接与拆分

  • str_c(..., sep = "", collapse = NULL)
  • str_dup(string, times)
  • str_split(string, pattern, n = Inf, simplify = FALSE)
  • str_split_fixed(string, pattern, n)
  • str_glue(..., .sep = "", .envir = parent.frame())

函数 str_cpaste 行为很类似,用于拼接字符串;函数 str_dup 重复多次字符串;函数 str_split 用于拆分字符串;函数 str_glue 很像Python的fstring,可以将变量用于字符串内替换。

> str_c(eA, collapse = "+")
[1] "eXample-Strinng+UPPER+lower"

> str_dup(eA, 3)
[1] "eXample-StrinngeXample-StrinngeXample-Strinng"
[2] "UPPERUPPERUPPER"                              
[3] "lowerlowerlower"

> str_split(eA, pattern = "-")
[[1]]
[1] "eXample" "Strinng"

[[2]]
[1] "UPPER"

[[3]]
[1] "lower"

> Name <- "Matt"
> str_glue("My name is {Name}")
My name is Matt

你可能感兴趣的:(R字符串操作(二))