R字符串处理2

1 、连接字符串：str_c()

str_c(..., sep = "", collapse = NULL)

sep：连接两个字符之间插入的符号
collapse：连接后，使用哪个字符分割

示例

> str_c("Letter: ", letters)
 [1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e" "Letter: f" "Letter: g" "Letter: h"
 [9] "Letter: i" "Letter: j" "Letter: k" "Letter: l" "Letter: m" "Letter: n" "Letter: o" "Letter: p"
[17] "Letter: q" "Letter: r" "Letter: s" "Letter: t" "Letter: u" "Letter: v" "Letter: w" "Letter: x"
[25] "Letter: y" "Letter: z"

> str_c("Letter", letters, sep = ": ")
 [1] "Letter: a" "Letter: b" "Letter: c" "Letter: d" "Letter: e" "Letter: f" "Letter: g" "Letter: h"
 [9] "Letter: i" "Letter: j" "Letter: k" "Letter: l" "Letter: m" "Letter: n" "Letter: o" "Letter: p"
[17] "Letter: q" "Letter: r" "Letter: s" "Letter: t" "Letter: u" "Letter: v" "Letter: w" "Letter: x"
[25] "Letter: y" "Letter: z"

> str_c(letters, " is for", "...")
 [1] "a is for..." "b is for..." "c is for..." "d is for..." "e is for..." "f is for..." "g is for..."
 [8] "h is for..." "i is for..." "j is for..." "k is for..." "l is for..." "m is for..." "n is for..."
[15] "o is for..." "p is for..." "q is for..." "r is for..." "s is for..." "t is for..." "u is for..."
[22] "v is for..." "w is for..." "x is for..." "y is for..." "z is for..."

> str_c(letters[-26], " comes before ", letters[-1])
 [1] "a comes before b" "b comes before c" "c comes before d" "d comes before e" "e comes before f"
 [6] "f comes before g" "g comes before h" "h comes before i" "i comes before j" "j comes before k"
[11] "k comes before l" "l comes before m" "m comes before n" "n comes before o" "o comes before p"
[16] "p comes before q" "q comes before r" "r comes before s" "s comes before t" "t comes before u"
[21] "u comes before v" "v comes before w" "w comes before x" "x comes before y" "y comes before z"
> str_c(letters, collapse = "")
[1] "abcdefghijklmnopqrstuvwxyz"

> str_c(letters, collapse = ", ")
[1] "a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z"

> str_c(c("a", NA, "b"), "-d")
[1] "a-d"  "NA-d" "b-d"

2 、字符串计数：str_count()

str_count(string, pattern = "")

string：字符串
pattern：对哪个字符进行计数

示例

> fruit <- c("apple", "banana", "pear", "pineapple")
> str_count(fruit, "a")
[1] 1 3 1 1
> str_count(fruit, "p")
[1] 2 0 1 3
> str_count(fruit, "e")
[1] 1 0 1 2
> str_count(fruit, c("a", "b", "p", "p"))
[1] 1 1 1 3
> str_count(c("a.", "...", ".a.a"), ".")
[1] 2 3 4
> str_count(c("a.", "...", ".a.a"), fixed("."))
[1] 1 3 2

3 、字符串逻辑判断：str_detect()

str_detect(string, pattern = "")

string：字符串
pattern：对哪个字符进行逻辑判断

示例

> fruit <- c("apple", "banana", "pear", "pinapple")
> str_detect(fruit, "a")
[1] TRUE TRUE TRUE TRUE
> str_detect(fruit, "^a")
[1]  TRUE FALSE FALSE FALSE
> str_detect(fruit, "a$")
[1] FALSE  TRUE FALSE FALSE
> str_detect(fruit, "b")
[1] FALSE  TRUE FALSE FALSE
> str_detect(fruit, "[aeiou]")
[1] TRUE TRUE TRUE TRUE
> # Also vectorised over pattern
> str_detect("aecfg", letters)
 [1]  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[17] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

4 、复制字符串：str_dup()

str_dup(string, times)

string：字符串
times：复制的次数

示例

> fruit <- c("apple", "pear", "banana")
> str_dup(fruit, 2)
[1] "appleapple"   "pearpear"     "bananabanana"
> str_dup(fruit, 1:3)
[1] "apple"              "pearpear"           "bananabananabanana"
> str_c("ba", str_dup("na", 0:5))
[1] "ba"           "bana"         "banana"       "bananana"     "banananana"   "bananananana"

5 、从字符串中提取匹配字符：str_extract()

str_extract(string, pattern)
str_extract_all(string, pattern, simplify = FALSE)

string:字符串
pattern：匹配的字符，默认正则和非正则

示例


> shopping_list <- c("apples x4", "bag of flour", "bag of sugar", "milk x2")
> str_extract(shopping_list, "\\d")
[1] "4" NA  NA  "2"
> str_extract(shopping_list, "[a-z]+")
[1] "apples" "bag"    "bag"    "milk"  
> str_extract(shopping_list, "[a-z]{1,4}")
[1] "appl" "bag"  "bag"  "milk"
> str_extract(shopping_list, "\\b[a-z]{1,4}\\b")
[1] NA     "bag"  "bag"  "milk"
> str_extract_all(shopping_list, "[a-z]+")
[[1]]
[1] "apples" "x"     

[[2]]
[1] "bag"   "of"    "flour"

[[3]]
[1] "bag"   "of"    "sugar"

[[4]]
[1] "milk" "x"   

> str_extract_all(shopping_list, "\\b[a-z]+\\b")
[[1]]
[1] "apples"

[[2]]
[1] "bag"   "of"    "flour"

[[3]]
[1] "bag"   "of"    "sugar"

[[4]]
[1] "milk"

> str_extract_all(shopping_list, "\\d")
[[1]]
[1] "4"

[[2]]
character(0)

[[3]]
character(0)

[[4]]
[1] "2"

6 、字符串的长度：str_length()

str_length(string))

string:字符串

示例


> str_length(letters)
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
> str_length(NA)
[1] NA
> str_length(factor("abc"))
[1] 3
> str_length(c("i", "like", "programming", NA))
[1]  1  4 11 NA
> # Two ways of representing a u with an umlaut
> u1 <- "\u00fc"
> u2 <- stringi::stri_trans_nfd(u1)
> # The print the same:
> u1
[1] "ü"
> u2
[1] "ü"
> # But have a different length
> str_length(u1)
[1] 1
> str_length(u2)
[1] 2

7 、找到匹配的字符串的位置：str_locate()

str_locate(string, pattern)
str_locate_all(string, pattern)

string:字符串
pattern：匹配的字符，默认正则和非正则

示例

> fruit <- c("apple", "banana", "pear", "pineapple")
> str_locate(fruit, "$")
     start end
[1,]     6   5
[2,]     7   6
[3,]     5   4
[4,]    10   9
> str_locate(fruit, "a")
     start end
[1,]     1   1
[2,]     2   2
[3,]     3   3
[4,]     5   5
> str_locate(fruit, "e")
     start end
[1,]     5   5
[2,]    NA  NA
[3,]     2   2
[4,]     4   4
> str_locate(fruit, c("a", "b", "p", "p"))
     start end
[1,]     1   1
[2,]     1   1
[3,]     1   1
[4,]     1   1
> str_locate_all(fruit, "a")
[[1]]
     start end
[1,]     1   1

[[2]]
     start end
[1,]     2   2
[2,]     4   4
[3,]     6   6

[[3]]
     start end
[1,]     3   3

[[4]]
     start end
[1,]     5   5

> str_locate_all(fruit, "e")
[[1]]
     start end
[1,]     5   5

[[2]]
     start end

[[3]]
     start end
[1,]     2   2

[[4]]
     start end
[1,]     4   4
[2,]     9   9

> str_locate_all(fruit, "")
[[1]]
     start end
[1,]     1   0
[2,]     2   1
[3,]     3   2
[4,]     4   3
[5,]     5   4

[[2]]
     start end
[1,]     1   0
[2,]     2   1
[3,]     3   2
[4,]     4   3
[5,]     5   4
[6,]     6   5

[[3]]
     start end
[1,]     1   0
[2,]     2   1
[3,]     3   2
[4,]     4   3

[[4]]
      start end
 [1,]     1   0
 [2,]     2   1
 [3,]     3   2
 [4,]     4   3
 [5,]     5   4
 [6,]     6   5
 [7,]     7   6
 [8,]     8   7
 [9,]     9   8

8 、从字符串中提取匹配组：str_match()

str_match(string, pattern)
str_match_all(string, pattern)

string：字符串
pattern：匹配的字符，默认正则和非正则

示例


> strings <- c(" 219 733 8965", "329-293-8753 ", "banana", "595 794 7569",
+ "387 287 6718", "apple", "233.398.9187 ", "482 952 3315",
+ "239 923 8115 and 842 566 4692", "Work: 579-499-7527", "$1000",
+ "Home: 543.355.3679")
> phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
> str_extract(strings, phone)
 [1] "219 733 8965" "329-293-8753" NA             "595 794 7569" "387 287 6718" NA            
 [7] "233.398.9187" "482 952 3315" "239 923 8115" "579-499-7527" NA             "543.355.3679"
> str_match(strings, phone)
      [,1]           [,2]  [,3]  [,4]  
 [1,] "219 733 8965" "219" "733" "8965"
 [2,] "329-293-8753" "329" "293" "8753"
 [3,] NA             NA    NA    NA    
 [4,] "595 794 7569" "595" "794" "7569"
 [5,] "387 287 6718" "387" "287" "6718"
 [6,] NA             NA    NA    NA    
 [7,] "233.398.9187" "233" "398" "9187"
 [8,] "482 952 3315" "482" "952" "3315"
 [9,] "239 923 8115" "239" "923" "8115"
[10,] "579-499-7527" "579" "499" "7527"
[11,] NA             NA    NA    NA    
[12,] "543.355.3679" "543" "355" "3679"
> str_extract_all(strings, phone)
[[1]]
[1] "219 733 8965"

[[2]]
[1] "329-293-8753"

[[3]]
character(0)

[[4]]
[1] "595 794 7569"

[[5]]
[1] "387 287 6718"

[[6]]
character(0)

[[7]]
[1] "233.398.9187"

[[8]]
[1] "482 952 3315"

[[9]]
[1] "239 923 8115" "842 566 4692"

[[10]]
[1] "579-499-7527"

[[11]]
character(0)

[[12]]
[1] "543.355.3679"

> str_match_all(strings, phone)
[[1]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "219 733 8965" "219" "733" "8965"

[[2]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "329-293-8753" "329" "293" "8753"

[[3]]
character(0)

[[4]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "595 794 7569" "595" "794" "7569"

[[5]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "387 287 6718" "387" "287" "6718"

[[6]]
character(0)

[[7]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "233.398.9187" "233" "398" "9187"

[[8]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "482 952 3315" "482" "952" "3315"

[[9]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "239 923 8115" "239" "923" "8115"
[2,] "842 566 4692" "842" "566" "4692"

[[10]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "579-499-7527" "579" "499" "7527"

[[11]]
character(0)

[[12]]
     [,1]           [,2]  [,3]  [,4]  
[1,] "543.355.3679" "543" "355" "3679"

> x <- c(" ", " <>", "", "", NA)
> str_match(x, "<(.*?)> <(.*?)>")
     [,1]      [,2] [,3]
[1,] " " "a"  "b" 
[2,] " <>"  "a"  ""  
[3,] NA        NA   NA  
[4,] NA        NA   NA  
[5,] NA        NA   NA  
> str_match_all(x, "<(.*?)>")
[[1]]
     [,1]  [,2]
[1,] "" "a" 
[2,] "" "b" 

[[2]]
     [,1]  [,2]
[1,] "" "a" 
[2,] "<>"  ""  

[[3]]
     [,1]  [,2]
[1,] "" "a" 

[[4]]
character(0)

[[5]]
character(0)

> str_extract(x, "<.*?>")
[1] "" "" "" NA    NA   
> str_extract_all(x, "<.*?>")
[[1]]
[1] "" ""

[[2]]
[1] "" "<>" 

[[3]]
[1] ""

[[4]]
character(0)

[[5]]
character(0)

9 、字符串增加空字符：str_pad()

str_pad(string, width, side = c("left", "right", "both"), pad = " ")

string：字符向量
decreasing：宽度
side：左边，右边，还是两边增加空格

示例

> str_pad("conan", 20, "left") [1] " conan" > # 从右边补充空格，直到字符串长度为20 > str_pad("conan", 20, "right") [1] "conan " > # 从左右两边各补充空格，直到字符串长度为20 > str_pad("conan", 20, "both") [1] " conan " > # 从左右两边各补充x字符，直到字符串长度为20 > str_pad("conan", 20, "both",'x') [1] "xxxxxxxconanxxxxxxxx"

10 、替换字符串：str_replace()

str_replace(string, pattern, replacement) str_replace_all(string, pattern, replacement)

string：字符向量
pattern：匹配的字符，默认正则和非正则

示例

> fruits <- c("one apple", "two pears", "three bananas") > str_replace(fruits, "[aeiou]", "-") [1] "-ne apple" "tw- pears" "thr-e bananas" > str_replace_all(fruits, "[aeiou]", "-") [1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s" > str_replace(fruits, "([aeiou])", "") [1] "ne apple" "tw pears" "thre bananas" > str_replace(fruits, "([aeiou])", "\\1\\1") [1] "oone apple" "twoo pears" "threee bananas" > str_replace(fruits, "[aeiou]", c("1", "2", "3")) [1] "1ne apple" "tw2 pears" "thr3e bananas" > str_replace(fruits, c("a", "e", "i"), "-") [1] "one -pple" "two p-ars" "three bananas" > fruits <- c("one apple", "two pears", "three bananas") > str_replace(fruits, "[aeiou]", "-") [1] "-ne apple" "tw- pears" "thr-e bananas" > str_replace_all(fruits, "[aeiou]", "-") [1] "-n- -ppl-" "tw- p--rs" "thr-- b-n-n-s" > str_replace_all(fruits, "([aeiou])", "") [1] "n ppl" "tw prs" "thr bnns" > str_replace_all(fruits, "([aeiou])", "\\1\\1") [1] "oonee aapplee" "twoo peeaars" "threeee baanaanaas" > str_replace_all(fruits, "[aeiou]", c("1", "2", "3")) [1] "1n1 1ppl1" "tw2 p22rs" "thr33 b3n3n3s" > str_replace_all(fruits, c("a", "e", "i"), "-") [1] "one -pple" "two p-ars" "three bananas"

11 、分割字符串：str_split()

str_split(string, pattern, n = Inf, simplify = FALSE) str_split_fixed(string, pattern, n)

string：字符向量
pattern：匹配的字符，默认正则和非正则
simplify：如何值为FALSE返回字符串向量，如何值为FALSE返回字符串矩阵

示例

> fruits <- c( + "apples and oranges and pears and bananas", + "pineapples and mangos and guavas" + ) > str_split(fruits, " and ") [[1]] [1] "apples" "oranges" "pears" "bananas" [[2]] [1] "pineapples" "mangos" "guavas" > str_split(fruits, " and ", n = 3) [[1]] [1] "apples" "oranges" "pears and bananas" [[2]] [1] "pineapples" "mangos" "guavas" > str_split(fruits, " and ", n = 2) [[1]] [1] "apples" "oranges and pears and bananas" [[2]] [1] "pineapples" "mangos and guavas" > # If n greater than number of pieces, no padding occurs > str_split(fruits, " and ", n = 5) [[1]] [1] "apples" "oranges" "pears" "bananas" [[2]] [1] "pineapples" "mangos" "guavas" > # Use fixed to return a character matrix > str_split_fixed(fruits, " and ", 3) [,1] [,2] [,3] [1,] "apples" "oranges" "pears and bananas" [2,] "pineapples" "mangos" "guavas" > str_split_fixed(fruits, " and ", 4) [,1] [,2] [,3] [,4] [1,] "apples" "oranges" "pears" "bananas" [2,] "pineapples" "mangos" "guavas" ""

12 、截取字符串：str_sub()

str_sub(string, start = 1L, end = -1L) str_sub(string, start = 1L, end = -1L) <- value

string：字符向量
start：开始位置
end：结束位置

示例

> hw <- "Hadley Wickham" > str_sub(hw, 1, 6) [1] "Hadley" > str_sub(hw, end = 6) [1] "Hadley" > str_sub(hw, 8, 14) [1] "Wickham" > str_sub(hw, 8) [1] "Wickham" > str_sub(hw, c(1, 8), c(6, 14)) [1] "Hadley" "Wickham" > str_sub(hw, -1) [1] "m" > str_sub(hw, -7) [1] "Wickham" > str_sub(hw, end = -7) [1] "Hadley W" > # Alternatively, you can pass in a two colum matrix, as in the > # output from str_locate_all > pos <- str_locate_all(hw, "[aeio]")[[1]] > str_sub(hw, pos) [1] "adley Wickham" "ey Wickham" "ickham" "am" "adley Wickham" "ey Wickham" [7] "ickham" "am" > str_sub(hw, pos[, 1], pos[, 2]) [1] "a" "e" "i" "a" > # Vectorisation > str_sub(hw, seq_len(str_length(hw))) [1] "Hadley Wickham" "adley Wickham" "dley Wickham" "ley Wickham" "ey Wickham" [6] "y Wickham" " Wickham" "Wickham" "ickham" "ckham" [11] "kham" "ham" "am" "m" > str_sub(hw, end = seq_len(str_length(hw))) [1] "H" "Ha" "Had" "Hadl" "Hadle" [6] "Hadley" "Hadley " "Hadley W" "Hadley Wi" "Hadley Wic" [11] "Hadley Wick" "Hadley Wickh" "Hadley Wickha" "Hadley Wickham" > # Replacement form > x <- "BBCDEF" > str_sub(x, 1, 1) <- "A"; x [1] "ABCDEF" > str_sub(x, -1, -1) <- "K"; x [1] "ABCDEK" > str_sub(x, -2, -2) <- "GHIJ"; x [1] "ABCDGHIJK" > str_sub(x, 2, -2) <- ""; x [1] "AK"

13 、删除空字符串：str_trim()

str_trim(string, side = c("both", "left", "right"))

string：字符向量
side：删除左边，右边，两边的空字符

示例

> str_trim(" String with trailing and leading white space\t") [1] "String with trailing and leading white space" > str_trim("\n\nString with trailing and leading white space\n\n") [1] "String with trailing and leading white space"

R字符串处理2

1 、连接字符串：str_c()

2 、字符串计数：str_count()

3 、字符串逻辑判断：str_detect()

4 、复制字符串：str_dup()

5 、从字符串中提取匹配字符：str_extract()

6 、字符串的长度：str_length()

7 、找到匹配的字符串的位置：str_locate()

8 、从字符串中提取匹配组：str_match()

9 、字符串增加空字符：str_pad()

10 、替换字符串：str_replace()

11 、分割字符串：str_split()

12 、截取字符串：str_sub()

13 、删除空字符串：str_trim()

你可能感兴趣的:(R字符串处理2)