处理字符串
grep grepl 和regexpr函数都能找到与模式相匹配的字符串 sub 和 gsub函数能替换匹配的字符串
加载strngr包,fixed里面为要匹配的字符串 返回匹配的字符串序列
> library(stringr) > multiple <- str_detect(english_monarchs$domain,fixed(",")) > english_monarchs[multiple,c("name","domain")] name domain 17 Offa East Anglia, Mercia 18 Offa East Anglia, Kent, Mercia 19 Offa and Ecgfrith East Anglia, Kent, Mercia 20 Ecgfrith East Anglia, Kent, Mercia 22 C<U+009C>nwulf East Anglia, Kent, Mercia 23 C<U+009C>nwulf and Cynehelm East Anglia, Kent, Mercia 24 C<U+009C>nwulf East Anglia, Kent, Mercia 25 Ceolwulf East Anglia, Kent, Mercia 26 Beornwulf East Anglia, Mercia 82 Ecgbehrt and <U+00C6>thelwulf Kent, Wessex 83 Ecgbehrt and <U+00C6>thelwulf Kent, Mercia, Wessex 84 Ecgbehrt and <U+00C6>thelwulf Kent, Wessex 85 <U+00C6>thelwulf and <U+00C6>eelstan I Kent, Wessex 86 <U+00C6>thelwulf Kent, Wessex 87 <U+00C6>thelwulf and <U+00C6>eelberht III Kent, Wessex 88 <U+00C6>eelberht III Kent, Wessex 89 <U+00C6>thelred I Kent, Wessex 95 Oswiu Mercia, Northumbria
> ruler <- str_detect(english_monarchs$name,",|and") > english_monarchs[ruler & !is.na(ruler)]把name一列拆分掉,则可以使用str_splist函数
> indival <- str_split(english_monarchs$name,",|and") > head(indival[sapply(indival,length)>1]) [[1]] [1] "Sigeberht " " Ecgric" [[2]] [1] "Hun" " Beonna " " Alberht" [[3]] [1] "Offa " " Ecgfrith" [[4]] [1] "C\u009cnwulf " " Cynehelm" [[5]] [1] "Sighere " " Sebbi" [[6]] [1] "Sigeheard " " Swaefred"
> str_count(english_monarchs$name,th)
ignore.case来忽略某一个字符或字符串