> DNA_BASES
[1] "A" "C" "G" "T"
> RNA_BASES
[1] "A" "C" "G" "U"
> GENETIC_CODE
TTT TTC TTA TTG TCT TCC TCA TCG TAT TAC TAA TAG TGT TGC TGA TGG CTT
"F" "F" "L" "L" "S" "S" "S" "S" "Y" "Y" "*" "*" "C" "C" "*" "W" "L"
CTC CTA CTG CCT CCC CCA CCG CAT CAC CAA CAG CGT CGC CGA CGG ATT ATC
"L" "L" "L" "P" "P" "P" "P" "H" "H" "Q" "Q" "R" "R" "R" "R" "I" "I"
ATA ATG ACT ACC ACA ACG AAT AAC AAA AAG AGT AGC AGA AGG GTT GTC GTA
"I" "M" "T" "T" "T" "T" "N" "N" "K" "K" "S" "S" "R" "R" "V" "V" "V"
GTG GCT GCC GCA GCG GAT GAC GAA GAG GGT GGC GGA GGG
"V" "A" "A" "A" "A" "D" "D" "E" "E" "G" "G" "G" "G"
attr(,"alt_init_codons")
[1] "TTG" "CTG"
> RNA_GENETIC_CODE
UUU UUC UUA UUG UCU UCC UCA UCG UAU UAC UAA UAG UGU UGC UGA UGG CUU
"F" "F" "L" "L" "S" "S" "S" "S" "Y" "Y" "*" "*" "C" "C" "*" "W" "L"
CUC CUA CUG CCU CCC CCA CCG CAU CAC CAA CAG CGU CGC CGA CGG AUU AUC
"L" "L" "L" "P" "P" "P" "P" "H" "H" "Q" "Q" "R" "R" "R" "R" "I" "I"
AUA AUG ACU ACC ACA ACG AAU AAC AAA AAG AGU AGC AGA AGG GUU GUC GUA
"I" "M" "T" "T" "T" "T" "N" "N" "K" "K" "S" "S" "R" "R" "V" "V" "V"
GUG GCU GCC GCA GCG GAU GAC GAA GAG GGU GGC GGA GGG
"V" "A" "A" "A" "A" "D" "D" "E" "E" "G" "G" "G" "G"
attr(,"alt_init_codons")
[1] "UUG" "CUG"
> AMINO_ACID_CODE
A R N D C Q E G H I L
"Ala" "Arg" "Asn" "Asp" "Cys" "Gln" "Glu" "Gly" "His" "Ile" "Leu"
K M F P S T W Y V U O
"Lys" "Met" "Phe" "Pro" "Ser" "Thr" "Trp" "Tyr" "Val" "Sec" "Pyl"
B J Z X
"Asx" "Xle" "Glx" "Xaa"
> IUPAC_CODE_MAP
A C G T M R W S Y K
"A" "C" "G" "T" "AC" "AG" "AT" "CG" "CT" "GT"
V H D B N
"ACG" "ACT" "AGT" "CGT" "ACGT"
用于核酸和蛋白序列比对的取代矩阵,这些数据需用户自行载入(最后三个是函数,但可以不设参数):data(BLOSUM45/50/62/80/100)
、data(PAM30/40/70/120/250)
、nucleotideSubstitutionMatrix()
、qualitySubstitutionMatrices()
、errorSubstitutionMatrices()
示例:(以下对象x均为XString)
> alphabet(BString())
NULL
> alphabet(DNAString())
[1] "A" "C" "G" "T" "M" "R" "W" "S" "Y" "K"
[11] "V" "H" "D" "B" "N" "-" "+" "."
> alphabet(RNAString())
[1] "A" "C" "G" "U" "M" "R" "W" "S" "Y" "K"
[11] "V" "H" "D" "B" "N" "-" "+" "."
> alphabet(AAString())
[1] "A" "R" "N" "D" "C" "Q" "E" "G" "H" "I"
[11] "L" "K" "M" "F" "P" "S" "T" "W" "Y" "V"
[21] "U" "O" "B" "J" "Z" "X" "*" "-" "+" "."
> b <- BString("I am a BString object")
> b
21-letter "BString" instance
seq: I am a BString object
> as.character(b)
[1] "I am a BString object" #注意普通字符串与XString的区别
> length(b) #在普通字符串向量中,length指向量个数
[1] 21
> nchar(b) #尽量使用length(),效率更高
[1] 21
> b[1]
1-letter "BString" instance
seq: I
> b[7:12]
6-letter "BString" instance
seq: BStri
> b[]
21-letter "BString" instance
seq: I am a BString object
> b[length(b):1]
21-letter "BString" instance
seq: tcejbo gnirtSB a ma I
> DNAString()==RNAString()
[1] TRUE
> DNAString('ACGT')==RNAString('ACGU') #该种比较T=U
[1] TRUE
> DNAString('ACGT')==RNAString('GCUU')
[1] FALSE
> DNAString('ACGT')==RNAString('ACGG')
[1] FALSE
> BString('ABCD')=='ABCD'
[1] TRUE
> BString('ACGT')==DNAString('ACGT')
Error in BString("ACGT") == DNAString("ACGT") :
comparison between a "BString" instance and a "DNAString" instance is not supported
XStringViews类由通过继承Views类(IRanges包定义)而来,XStringViews类对象用于存储同一条序列(目标序列)上的一系列“视野”,即序列区域或子序列。每一个视野由起始点(start)和终止点(end)确定,也隐含了序列的长度信息。
示例:
> d <- DNAString("TTGAAAA-CTC-N")
> dd2 <- subseq(d, start=8)
> v4 <- Views(dd2, start=3:0, end=5:8)
> v4
Views on a 6-letter DNAString subject
subject: -CTC-N
views:
start end width
[1] 3 5 3 [TC-]
[2] 2 6 5 [CTC-N]
[3] 1 7 7 [-CTC-N ]
[4] 0 8 9 [ -CTC-N ] #最后2行超出限制
> length(v4)
[1] 4
> v4[4:2]
Views on a 6-letter DNAString subject
subject: -CTC-N
views:
start end width
[1] 0 8 9 [ -CTC-N ]
[2] 1 7 7 [-CTC-N ]
[3] 2 6 5 [CTC-N]
> v4[2] #索引单个元素仍为XStringViews类
Views on a 6-letter DNAString subject
subject: -CTC-N
views:
start end width
[1] 2 6 5 [CTC-N]
> v4[[2]] #两个方括号,返回XString类
5-letter "DNAString" instance
seq: CTC-N
> v4[[3]] #超出限制,故报错
Error in getListElement(x, i, ...) : view is out of limits
> v4[-3]
Views on a 6-letter DNAString subject
subject: -CTC-N
views:
start end width
[1] 3 5 3 [TC-]
[2] 2 6 5 [CTC-N]
[3] 0 8 9 [ -CTC-N ]
> v4[c(TRUE,FALSE,TRUE,FALSE)]
Views on a 6-letter DNAString subject
subject: -CTC-N
views:
start end width
[1] 3 5 3 [TC-]
[2] 1 7 7 [-CTC-N ]
BStringSet(x=character(), start=NA, end=NA, width=NA, use.names=TRUE)
示例:
> x0 <- c("#CTC-NACCAGTAT", "#TTGA", "TACCTAGAG")
> x0
[1] "#CTC-NACCAGTAT" "#TTGA"
[3] "TACCTAGAG"
> width(x0)
[1] 14 5 9
> x1 <- BStringSet(x0)
> x1
A BStringSet instance of length 3
width seq
[1] 14 #CTC-NACCAGTAT
[2] 5 #TTGA
[3] 9 TACCTAGAG
> BStringSet(x0, start=4, end=-3)
A BStringSet instance of length 3
width seq
[1] 9 C-NACCAGT
[2] 0
[3] 4 CTAG
> subseq(x1, start=4, end=-3)
A BStringSet instance of length 3
width seq
[1] 9 C-NACCAGT
[2] 0
[3] 4 CTAG
> dna0 <- DNAStringSet(x0, start=4, end=-3)
> names(dna0)
NULL
> names(dna0)[2] <- "seqB"
> dna0
A DNAStringSet instance of length 3
width seq names
[1] 9 C-NACCAGT <NA>
[2] 0 seqB
[3] 4 CTAG <NA>
> dna <- DNAString('ATTGCATACCATAGG')
> dna
15-letter "DNAString" instance
seq: ATTGCATACCATAGG
> RNAString(dna)
15-letter "RNAString" instance
seq: AUUGCAUACCAUAGG
> complement(dna)
15-letter "DNAString" instance
seq: TAACGTATGGTATCC
> dna
15-letter "DNAString" instance
seq: ATTGCATACCATAGG
> rev(dna) #反向
15-letter "DNAString" instance
seq: GGATACCATACGTTA
> reverseComplement(dna) #反向互补
15-letter "DNAString" instance
seq: CCTATGGTATGCAAT
> dna
15-letter "DNAString" instance
seq: ATTGCATACCATAGG
> rna <- RNAString(complement(dna))
> rna
15-letter "RNAString" instance
seq: UAACGUAUGGUAUCC
> dna
15-letter "DNAString" instance
seq: ATTGCATACCATAGG
> DNAString(rna)
15-letter "DNAString" instance
seq: TAACGTATGGTATCC
> rna
15-letter "RNAString" instance
seq: UAACGUAUGGUAUCC
> cDNA <- DNAString(complement(rna))
> cDNA
15-letter "DNAString" instance
seq: ATTGCATACCATAGG
注意:转录和逆转录需要套用两层:RNAString(complement(dna))和DNAString(complement(rna))。
> codons(rna)
Views on a 15-letter RNAString subject
subject: UAACGUAUGGUAUCC
views:
start end width
[1] 1 3 3 [UAA]
[2] 4 6 3 [CGU]
[3] 7 9 3 [AUG]
[4] 10 12 3 [GUA]
[5] 13 15 3 [UCC]
> translate(rna)
5-letter "AAString" instance
seq: *RMVS
> alphabetFrequency(dna)
A C G T M R W S Y K V H D B N - + .
5 3 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0
> letterFrequency(dna,letters = 'CG')
C|G
6
> GC <- letterFrequency(dna,'CG')/letterFrequency(dna,'AGCT')
> GC
C|G
0.4
> matchPattern('CA',dna)
Views on a 15-letter DNAString subject
subject: ATTGCATACCATAGG
views:
start end width
[1] 5 6 2 [CA]
[2] 10 11 2 [CA]
部分参考:https://blog.csdn.net/u014801157/article/details/24372449(侵删)