R语言实战分析预测海藻数量

********************预测海藻数量R语言脚本************************

---加载数据包

library(DMwR)

head(algae)

 

 

---对于数据给出行名称

algae=read.table("Analysis.txt",

header=F,

dec='.',

col.names=c('season','size','speed','mxPH','mno2','cl','no3','nh4','opo4','po4','chla','a1','a2','a3','a4','a5','a6','a7'),

na.strings=c('XXXXXXX'))

 

---绘制PH直方图

hist(algae$mxPH,prob=T)

 

 

---绘制PH直方图加密度图,用QQ图查看数据是否符合正态分布

library(car)

par(mfrow=c(1,2))

hist(algae$mxPH,prob=T,xlab='',main='Histogram of maximum ph value',ylim=0:1)

lines(density(algae$mxPH,na.rm=T))

rug(jitter(algae$mxPH))

qq.plot(algae$mxPH,main='Normal QQ Plot of maximum PH')

par(mfrow=c(1,1))

 

 

---绘制opo4箱线图

boxplot(algae$opo4,ylab="orthophosphate (opo4)")

rug(jitter(algae$opo4),side=2)

abline(h=mean(algae$opo4,na.rm=T),lty=2)

 

 

---离群值的检测,三条线分别表示均值,均值加标准差,中位数

plot(algae$nh4,xlab="")

abline(h=mean(algae$nh4,na.rm=T),lty=1)

abline(h=mean(algae$nh4,na.rm=T)+sd(algae$nh4,na.rm=T),lty=2)

abline(h=median(algae$nh4,na.rm=T),lty=3)

identify(algae$nh4)

 

---离群值的检测

plot(algae$nh4,xlab="")

clicked.lines=identify(algae$nh4)

 

algae[clicked.lines, ]

 

 

algae[algae$nh4.line>19000, ]

 

---因子变量绘制lattice箱线图(在规模较小的河流中,a1的频率较高)

library(lattice)

bwplot(size~a1,data=algae,ylab='Rive Size',xlab='Algal A1')

 

 

---分位箱线图

library(Hmisc)

bwplot(size~a1,data=algae,panel=panel.bpplot,

probs=seq(.01,.49,by=.01),datadensity=TRUE,

yalb='River Size',xlab='Algal A1'

)

 

 

---两个条件的影响绘图

你可能感兴趣的:(r语言,预测,脚本,数据,library)