##导入数据
setwd("C:/Users/chang/Documents/SRM-PA/R简介/上课练习数据集")
healthexpend <- read.csv(file="HealthExpend.csv",header=T)
##取其中EXPENDOP>0的数据,记为EXPENDOP
attach(healthexpend)
EXPENDOP<- healthexpend$EXPENDOP[EXPENDOP>0]
EXPENDOP
summary(EXPENDOP)
##做频数直方图
hist(EXPENDOP,freq=T,breaks=10,xlim=c(0,70000))
n <- length(EXPENDOP)
m <- mean(EXPENDOP)
v <- var(EXPENDOP)*(n-1)/n # 注意!
sigma1 <- (log(1+(v/(m^2))))^0.5
m1 <- log(m)-1/2*(sigma1^2)
m1;sigma1
## 矩估计(牛顿法)
Newtons <- function(fun,x,ep=1e-2,it_max=100000){
index <- 0;k <- 1
while(k<=it_max){
x1 <- x;obj <- fun(x)
x <- x-solve(obj$J,obj$f)
times <- sqrt((x-x1)%*%(x-x1))
if(times < ep){
index <- 1;break
}
k <- k+1
}
obj <- fun(x)
list(root=x,it=k,index=index,FunVal=obj$f)
}
fun <- function(p){
n <- length(EXPENDOP)
A1 <- mean(EXPENDOP);M1 <- (n-1)/n*var(EXPENDOP)
f <- c(exp(p[1]+p[2]/2)-A1,exp(2*p[1]+p[2])*(exp(p[2])-1)-M1)
J <- matrix(c(exp(p[1]+p[2]/2),exp(p[1]+p[2]/2)/2,2*(exp(p[2])-1)*exp(2*p[1]+p[2]),
2*exp(2*p[1]+2*p[2])-exp(2*p[1]+p[2])),ncol=2,nrow=2,byrow = T)
list(f=f,J=J)
}
newton <- Newtons(fun,c(20,5))
m1a <- newton$root[1]
sigma1a <- (newton$root[2])^0.5
m1a;sigma1a
fun2 <- function(params){
data <- EXPENDOP
f <- sum(dlnorm(data,params[1],params[2],log= TRUE))
return(-f)
}
nlm1 <- nlm(fun2,c(6,1))
nlm2 <- nlminb(c(6,1),fun2,lower=c(0,0),upper=c(Inf,Inf))
m2 <- nlm2$par[1]
sigma2 <- nlm2$par[2]
m2;sigma2
直方图及对应的拟合的密度曲线
hist(EXPENDOP,freq=F,breaks=10,xlim=c(0,70000),main="直方图及拟合的密度曲线")
curve(dlnorm(x,m1,sigma1),lty=1,col="blue",add=TRUE,lwd=2)
curve(dlnorm(x,m2,sigma2),lty=2,col="red",add=TRUE,lwd=2)
legend("topright",cex=0.6,c("矩估计","极大似然估 计"),col=c("blue","red"),lty=c(1,2),lwd=c(2,2))
经验分布函数及拟合的分布函数
plot(ecdf(EXPENDOP),verticals=TRUE,do.p=FALSE,xlab="EXPENDOP",main="经验分布函数图像",ylab="分布函数",xlim=c(0,70000))
curve(plnorm(x,m1,sigma1),lty=1,col="blue",add=TRUE,lwd=2)
curve(plnorm(x,m2,sigma2),lty=2,col="red",add=TRUE,lwd=2)
legend("bottomright",inset=0.04,cex=0.6,c("矩估计","极大似然估计"),col=c("blue","red"),lty=c(1,2),lwd=c(2,2))
分别用泊松和负二项分布拟合所给保单赔款次数分布,参数估计方法采用MLE,然后计算泊松分布与负二项分布拟合的保单数,并将两种分布拟合的保单数放在原数据的后两列,将上述表格以CSV表格形式输出。
data1 <- read.csv(choose.files(), header=T)##nlminb练习.csv
fix(data)# 直接手动改数据
n <- as.numeric(data1[1:7,1])#索赔次数,转变成数值型
m <- data1[1:7,2] #保单数
x <- c()##经验数据
for (i in 0:6){
x <- append(x,rep(i,m[i+1]))
}
sumlnb <- function(data,parm){
sumL <- sum(log(dnbinom(data,size=parm[1],prob=parm[2])))
return(-sumL)
}#似然函数
parm0 <- c(3,0.8)#初始参数
best_MLEnb <- nlminb(start=parm0,objective=sumlnb,data=x)##最大化似然函数
best_parmnb <- best_MLEnb$par
best_parmnb#参数估计结果
MLEnb <- round(dnbinom(n,size=best_parmnb[1],prob=best_parmnb[2])*100000,0)
sumlpoi<- function(data,parm){
sumL <- sum(log(dpois(data,lambda=parm)))
return(-sumL)
}
parm1 <- 3
best_MLEpoi <- nlminb(start=parm1,objective=sumlpoi,data=x)
best_parmpoi <- best_MLEpoi$par
best_parmpoi
MLEpoi <- round((dpois(n,lambda=best_parmpoi))*100000,0)
结果输出
nlminb_result <- data.frame("赔款次数"=n, "保单数"=m, "负二项分布拟合结果"=MLEnb, "泊松分布拟合结果"=MLEpoi)
nlminb_result
write.csv(nlminb_result, "nlminb_result.csv")