前文学习了R基础的字符串操作函数,这次来学习 stringr
提供的函数。
library(stringr)
eA <- c("eXample-Strinng", "UPPER", "lower")
检测子字符串
- str_detect(string, pattern, negate = FALSE)
- str_which(string, pattern, negate = FALSE)
- str_count(string, pattern = "")
- str_locate(string, pattern)
- str_locate_all(string, pattern)
- str_starts(string, pattern, negate = FALSE)
- str_ends(string, pattern, negate = FALSE)
函数 str_detect
检测是否包含子字符串;str_which
返回检测到子字符串的向量索引;str_count
显示子字符串数目; str_locate
显示子字符串的开始与结束位置;函数 str_starts
和 str_ends
分别检测字符串开头结尾。
> str_detect(eA, "Str")
[1] TRUE FALSE FALSE
> str_which(eA, "Str")
[1] 1
> str_count(eA, "r")
[1] 1 0 1
> str_locate(eA, "Str")
start end
[1,] 9 11
[2,] NA NA
[3,] NA NA
> str_locate_all(eA, "Str")
[[1]]
start end
[1,] 9 11
[[2]]
start end
[[3]]
start end
提取子字符串
- str_sub(string, start = 1L, end = -1L)
- str_subset(string, pattern, negate = FALSE)
- str_extract(string, pattern)
- str_extract_all(string, pattern, simplify = FALSE)
- str_match(string, pattern)
- str_match_all(string, pattern)
函数 str_sub
按位置取子字符串,如果用负数位置表示按 反方向 顺序取,如果位置是0会返回空字符串;函数 str_subset
则是返回包含子字符串的字符串;函数 str_extract
会返回匹配的字符串;函数 str_match
返回多列匹配结果,第一列是整体匹配结果,后面是每一组(正则)匹配结果。
> str_sub(eA, start = 2, end = 6)
[1] "Xampl" "PPER" "ower"
> str_sub(eA, start = -5, end = -1)
[1] "rinng" "UPPER" "lower"
> str_sub(eA, start = -5, end = 0)
[1] "" "" ""
> str_subset(eA, "r")
[1] "eXample-Strinng" "lower"
> str_subset(eA, "R")
[1] "UPPER"
> str_extract(eA, "[a-p]{2,4}")
[1] "ampl" NA "lo"
> str_extract_all(eA, "[a-p]{2,4}")
[[1]]
[1] "ampl" "inng"
[[2]]
character(0)
[[3]]
[1] "lo"
> str_match(eA, "(\\w{3})(-)(\\w{3})")
[,1] [,2] [,3] [,4]
[1,] "ple-Str" "ple" "-" "Str"
[2,] NA NA NA NA
[3,] NA NA NA NA
> str_match_all(eA, "(\\w{3})(-)(\\w{3})")
[[1]]
[,1] [,2] [,3] [,4]
[1,] "ple-Str" "ple" "-" "Str"
[[2]]
[,1] [,2] [,3] [,4]
[[3]]
[,1] [,2] [,3] [,4]
字符串长度
- str_length(string)
- str_pad(string, width, side = c("left", "right", "both"), pad = " ")
- str_trunc(string, width, side = c("right", "left", "center"), ellipsis = "...")
- str_trim(string, side = c("both", "left", "right"))
函数 str_length
返回字符串长度;函数 str_pad
是用指定字符填充字符串到目标长度;函数 str_trunc
是截断字符串到一定长度;函数 str_trim
移除字符串2边空白符。
> str_length(eA)
[1] 15 5 5
> str_pad(eA, width = 20, side = "left", pad = "=")
[1] "=====eXample-Strinng" "===============UPPER" "===============lower"
> str_pad(eA, width = 20, side = "right", pad = "=")
[1] "eXample-Strinng=====" "UPPER===============" "lower==============="
> str_pad(eA, width = 20, side = "both", pad = "=")
[1] "==eXample-Strinng===" "=======UPPER========" "=======lower========"
> str_trunc(eA, width = 5, side = "right")
[1] "eX..." "UPPER" "lower"
> str_trunc(eA, width = 5, side = "left")
[1] "...ng" "UPPER" "lower"
> str_trunc(eA, width = 5, side = "left", ellipsis = "+++")
[1] "+++ng" "UPPER" "lower"
修改字符串
- str_sub(string, start = 1L, end = -1L, omit_na = FALSE) <- value
- str_replace(string, pattern, replacement)
- str_replace_all(string, pattern, replacement)
- str_to_upper(string, locale = "en")
- str_to_lower(string, locale = "en")
函数 str_sub
会直接修改原字符串,按照位置替换新内容;函数 str_replace
替换目标子字符串;函数 str_to_upper
和 str_to_lower
是大小写转换。
> str_sub(eA, start = 1, end = 5) <- "New"
> eA
[1] "Newle-Strinng" "New" "New"
> str_replace(eA[1], pattern = "nn", replacement = "n")
[1] "eXample-String"
> str_to_upper(eA)
[1] "EXAMPLE-STRINNG" "UPPER" "LOWER"
> str_to_lower(eA)
[1] "example-strinng" "upper" "lower"
拼接与拆分
- str_c(..., sep = "", collapse = NULL)
- str_dup(string, times)
- str_split(string, pattern, n = Inf, simplify = FALSE)
- str_split_fixed(string, pattern, n)
- str_glue(..., .sep = "", .envir = parent.frame())
函数 str_c
跟 paste
行为很类似,用于拼接字符串;函数 str_dup
重复多次字符串;函数 str_split
用于拆分字符串;函数 str_glue
很像Python的fstring,可以将变量用于字符串内替换。
> str_c(eA, collapse = "+")
[1] "eXample-Strinng+UPPER+lower"
> str_dup(eA, 3)
[1] "eXample-StrinngeXample-StrinngeXample-Strinng"
[2] "UPPERUPPERUPPER"
[3] "lowerlowerlower"
> str_split(eA, pattern = "-")
[[1]]
[1] "eXample" "Strinng"
[[2]]
[1] "UPPER"
[[3]]
[1] "lower"
> Name <- "Matt"
> str_glue("My name is {Name}")
My name is Matt