类似分位回归的,给定分位数序列 0 < τ 1 < τ 2 < ⋯ < τ K < 1 0<\tau_1<\tau_2<\cdots<\tau_K<1 0<τ1<τ2<⋯<τK<1,复合分位回归的目的不再是在一个分位点上最小化损失函数,而是在多个分位点上同时最小化check function. 则估计回归系数 β \boldsymbol{\beta} β的估计是通过如下目标函数得到的:
( b ^ 1 , … , b ^ K , β ^ C Q R ) = arg min b 1 … , b k , β ∑ k = 1 K { ∑ i = 1 n ρ τ k ( y i − b k − x i ⊤ β ) } (\hat{b}_1,\dots,\hat{b}_K,\hat{\boldsymbol{\beta}}^{\mathrm{CQR}})=\underset{b_1\dots,b_k,\boldsymbol{\beta}}{\argmin}\sum_{k=1}^K\left\{ \sum_{i=1}^n\rho_{\tau_k} (y_i-b_k-\bold{x}_i^{\top}\boldsymbol{\beta})\right\} (b^1,…,b^K,β^CQR)=b1…,bk,βargmink=1∑K{i=1∑nρτk(yi−bk−xi⊤β)}
通常我们会取等距分位序列: τ k = k K + 1 , k = 1 , 2 , … , K \tau_k=\frac{k}{K+1},k=1,2,\dots,K τk=K+1k,k=1,2,…,K. 同时给出估计量的渐近分布: n ( β ^ C Q R − β ∗ ) → N ( 0 , Σ C Q R ) \sqrt{n}(\hat{\boldsymbol{\beta}}^{\mathrm{CQR}}-\boldsymbol{\beta}^*)\to N(0,\boldsymbol{\Sigma}_{\mathrm{CQR}}) n(β^CQR−β∗)→N(0,ΣCQR)其中
Σ C Q R = C − 1 ∑ k , k ′ = 1 K min ( τ k , τ k ′ ) ( 1 − max ( τ k , τ k ′ ) ) ( ∑ k = 1 K f ( b τ k ∗ ) ) 2 \boldsymbol{\Sigma}_{\mathrm{CQR}}=\bold{C}^{-1}\frac{\sum_{k,k'=1}^K\min(\tau_k,\tau_{k'})(1-\max(\tau_k,\tau_k'))}{(\sum_{k=1}^Kf(b^*_{\tau_k}))^2} ΣCQR=C−1(∑k=1Kf(bτk∗))2∑k,k′=1Kmin(τk,τk′)(1−max(τk,τk′))
其中 C \bold{C} C是
lim n → ∞ 1 n X ⊤ X = C \lim_{n\to\infty}\frac{1}{n}\bold{X}^{\top}\bold{X}=\bold{C} n→∞limn1X⊤X=C
data = pd.read_csv(r"C:\Users\beida\Desktop\sc\rent.csv")
n = len(data); p = 1
x = np.array(data[["cons", 'area']])
y = np.array(data["rent_euro"], dtype=np.float64)
#beta = np.matrix([100, 2.6]).reshape(p, 1)
# np.set_printoptions(precision=8)
tau = np.arange(1, 6)/6
k = len(tau)
maxit = 1000
toler = 0.0001
error = 10000
iteration = 1
p = 2
u = np.zeros(k)
r = np.zeros((n, k))
signw = np.zeros((n, k))
z = np.zeros((n, k))
newX = np.zeros((n, k))
#beta = np.matrix([1, 1], dtype=np.float64).reshape(2, 1)
beta = np.linalg.pinv(x.T.dot(x)).dot(x.T).dot(y)
#print(beta, "ols")
while (iteration <= maxit) & (error > toler):
betaold = beta.copy()
#print(betaold, 'betaold')
uv = np.sort(y - x.dot(beta), axis=0) # yes
quantile1 = (n-1)*tau - np.floor((n - 1)*tau) # yes
for i in range(0, k):
u[i] = quantile1[i] * uv[int(np.ceil((n - 1)*tau[i]))] \
+ (1-quantile1[i]) * uv[int(np.floor((n - 1)*tau[i]))]
yh = x.dot(beta)
for i in range(0, k):
r[:, i] = y - u[i] - yh
signw[:, i] = (1 - np.sign(r[:, i]))/2 * (1 - tau[i]) \
+ (np.sign(r[:, i]) + 1) * tau[i]/2
for j in range(0, p):
xbeta = beta[j] * x[:, j]
for i in range(0, k):
z[:, i] = (r[:, i]+xbeta)/x[:, j]
newX[:, i] = x[:, j] * signw[:, i]
vz = z.flatten()
order = vz.argsort()
sortz = vz[order] # yes
vnewX = newX.flatten()
w = np.abs(vnewX[order])
index = np.where(np.cumsum(w) > (np.sum(w)/2))[0][0]
# print(index)
beta[j] = sortz[index]
error = np.sum(np.abs(beta-betaold))
iteration = iteration + 1
print("beta:", beta)
print("tau:", tau)
print("cons:", np.percentile((y-x.dot(beta)), tau*100))
结果
> beta: [135.63724592 4.3381923 ]
> tau: [0.16666667 0.33333333 0.5 0.66666667 0.83333333]
> cons: [-114.9981428 -40.30912765 16.70578506 75.41938784 165.26588296]
cqrmm(x=x,y=y,tau=tau)
set.seed(1)
n=100
p=2
a=rnorm(n*p, mean = 1, sd =1)
x=matrix(a,n,p)
beta=rnorm(p,1,1)
beta=matrix(beta,p,1)
y=x%*%beta-matrix(rnorm(n,0.1,1),n,1)
tau=1:5/6
# x is 1000*10 matrix, y is 1000*1 vector, beta is 10*1 vector
cqr.mm(x,y,tau)
cqrmm = function(x, y, beta, to, m, tau){
if (missing(to)){
toler = 1e-3
}else{toler = to}
if (missing(m)){
maxit = 200
}else{maxit = m}
if (missing(tau)){
cat('no tau_k input','\n')
tau=1:5/6
}else{tau=tau}
x = x
X = x
#arma:: mat x=(xr),r,product,xt,denominator;
#arma:: vec W,uv,v,y=(yr),delta;
#arma:: vec betaold,beta=(betar),quantile,u,yh;
#arma::uvec order, index;
n=nrow(x)
p = ncol(x)
k=length(tau)
error=10000
epsilon=0.9999
iteration=1;
#u.zeros(k);
#r.zeros(n,k);
u <- numeric(k)
r <- matrix(0, n, k)
if (missing(beta)){
beta = solve(t(x)%*%x, t(x)%*%y)
}else{beta = beta}
xt=t(x)
product <- matrix(1, p, n)
while (iteration<=maxit && error>toler)
{
betaold = beta;
yh = x%*% beta; #y_hat
uv = sort(y - yh)
# u is vec of the quantiles of given vector
quantile = (n-1) * tau - floor((n-1) * tau)
for (i in 1:k){
u[i] = quantile[i] * uv[ceiling((n-1) * tau[i])] + (1 - quantile[i]) * uv[floor((n-1) * tau[i])]
}
for (i in 1:k){
r[,i] = y-u[i]-yh;
}
denominator=1/(abs(r)+epsilon)
W = rowSums(denominator)
v <- k - 2 * sum(tau) - rowSums(r * denominator)
for (i in 1:n){
product[,i] = xt[,i]*W[i]
}
delta = solve(product%*%x, xt%*%v);
beta = beta-delta
error = sum(abs(delta))
iteration = iteration + 1
}
b=quantile(y-X%*%beta, tau)
return(list(beta=beta,b=b))
}
library(MASS)
library(cqrReg)
set.seed(NULL)
tau.k = 1:5 / 6
CQREM = function(X, y, tau, betar, weight, maxit, toler) {
if (missing(betar)) {
beta = solve(t(X) %*% X, t(X) %*% y)
cat(beta,'\n')
}
if (missing(tau)) {
tau.k = 1:5 / 6
alpha = tau
}
K = length(tau)
if (missing(weight)) {
weight = rep(1, times = K)
}
if (missing(maxit)) {
maxit = 1000
}
if(missing(toler)){
toler = 1e-5
}
weight = weight
alpha = tau
n = length(y)
tau.k = tau
theta.1 = (1 - 2 * tau.k) / (tau.k * (1 - tau.k))
theta.2 = 2 / (tau.k * (1 - tau.k))
error = 10000
epsilon = 0.9999
iteration = 1
while (iteration <= maxit && error > toler) {
#for (i in 1:maxit){
rbar = matrix(NA, n, K)
mu_new = matrix(NA, n, K)
mu = matrix(NA, n, K)
w.ik = matrix(NA, n, K)
d.ik = matrix(NA, n, K)
r = y - X %*% beta
r
for (k in 1:K) {
rbar[, k] <- sum(r - alpha[k])
}
rbar
delta2 = sqrt((theta.1 ^ 2 + 2 * theta.2)) / abs(rbar)
delta2
delta3 = (theta.2 * weight) / (theta.1 ^ 2 + 2 * theta.2) + abs(rbar) / (sqrt(theta.1 ^ 2 + 2 * theta.2))
delta3
for (k in 1:K) {
w.ik[, k] = delta2[, k] / (theta.2 * weight)[k]
}
w.ik
w.i = rowSums(w.ik)
w.i
W = diag(w.i)
W
#svd_W <- svd(W)
#U <- svd_W$u
#V <- svd_W$v
#D <- svd_W$d
# 构建逆矩阵
#W_inv <- diag(1/D)
#W_reg <- W + lambda * diag(nrow(W))
for (k in 1:K) {
d.ik[, k] = (theta.1 + alpha * delta2)[, k] / (theta.2 * weight)[k]
}
d.ik
d.i = rowSums(d.ik)
d.i
D = as.vector(d.i)
ybar = y - solve(W) %*% D
ybar
beta
beta_new = solve(t(X) %*% W %*% X) %*% t(X) %*% W %*% ybar
beta_new
A = matrix(NA, n, K)
for (k in 1:K){
A[,k] = (y-X%*%beta_new)*delta2[, k]
}
alpha
alpha_new=colSums(A - n*theta.1)/colSums(delta2)
for (k in 1:K) {
mu[, k] = X %*% beta_new + alpha_new[k]
}
weight_new = (2 / (3 * n)) * colSums((y - mu) ^ 2 / (2 * theta.2) * delta2 +
(theta.1 ^ 2 + 2 * theta.2) / (2 * theta.2) * delta3 -
(theta.1 * (y - mu)) / (theta.2))
#alpha_new = colSums(as.numeric((y[1] - t(
# as.matrix(X[1,], row = 1, col = 2)
#)) %*% beta) * delta2)
alpha_new
error = sum(abs(beta_new))
beta = beta_new
alpha = alpha_new
weight = weight_new
iteration = iteration + 1
#cat(error,'\n')
}
b = quantile(y - X %*% beta, tau.k)
return(list(beta = beta, b = b))
}
n = 300
p = 3
X = matrix(rnorm(n*p, 0, 1), n, p)
y = rnorm(n)
CQREM(X, y, tau=tau.k, maxit = 1000, tol=1e-5)
cqr.mm(X, y, tau= tau.k,toler = 1e-5)
>$beta
[,1]
[1,] -0.01504609
[2,] -0.14649880
[3,] 0.23344365
$b
16.66667% 33.33333% 50% 66.66667% 83.33333%
-0.881195701 -0.428558273 -0.000221886 0.470748757 1.102072419
$beta
[,1]
[1,] 0.05258097
[2,] -0.05178250
[3,] 0.13393040
$b
16.66667% 33.33333% 50% 66.66667% 83.33333%
-0.9269618879 -0.3647195276 -0.0001248183 0.4284496629 1.0367478727
EM算法的速度显著慢于MM算法,同时在估计结果上也有略微的差异。
实际上还有一些暴力的求解方法,比如直接凸优化
library(CVXR)
library(cqrReg)
set.seed(1)
n=100
p=2
a=rnorm(n*p, mean = 1, sd =1)
x=matrix(a,n,p)
beta=rnorm(p,1,1)
beta=matrix(beta,p,1)
y=x%*%beta-matrix(rnorm(n,0.1,1),n,1)
tau=1:5/6
cqr.mm(x,y,tau)
# 假设数据已经定义好
Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量
# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))
# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {
sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}
# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {
quantile_loss(Y - b[i] - X %*% beta, taus[i])
})
# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))
# 定义优化问题
problem <- Problem(objective, list())
# 求解
result <- solve(problem)
# 提取结果
result$getValue(beta)
> [,1]
> [1,] 1.470763
> [2,] 2.758932
result$getValue(b)
>
> [,1]
> [1,] -1.1859157
> [2,] -0.7531299
> [3,] -0.2539941
> [4,] 0.1203188
> [5,] 0.7931756
> cqr.mm(x,y,tau)
$beta
[,1]
[1,] 1.508830
[2,] 2.749125
$b
16.66667% 33.33333% 50% 66.66667% 83.33333%
-1.21780307 -0.78448530 -0.29961304 0.05043142 0.69900106
可以看到,与MM算法也存在一些差异
复合分位回归是最小化一系列分位损失函数,但不难看出,CQR是赋予了每个 ρ τ k \rho_{\tau_k} ρτk完全相同的权重,那么就可以考虑不同的权重。这样得到的就是加权的CQR(WCQR)
( α ^ 1 , … , α ^ K , β ^ W C Q R ) = arg min α 1 , … , α K , β ∑ i = 1 N ∑ k = 1 K ω k ⋅ ρ τ k ( Y i − X i T β − α k ) . \left(\hat{\alpha}_{1}, \ldots, \hat{\alpha}_{K}, \hat{\beta}^{\mathrm{WCQR}}\right)=\arg \min _{\alpha_{1}, \ldots, \alpha_{K}, \beta} \sum_{i=1}^{N} \sum_{k=1}^{K} \omega_{k} \cdot \rho_{\tau_{k}}\left(Y_{i}-X_{i}^{T} \beta-\alpha_{k}\right) . (α^1,…,α^K,β^WCQR)=argα1,…,αK,βmini=1∑Nk=1∑Kωk⋅ρτk(Yi−XiTβ−αk).
求解原理与CQR类似的,加入设定相同的权重w,那么就应该得到与CQR完全相同的结果
Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量
w = c(0.2,0.2,0.2,0.2,0.2)
# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))
# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {
sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}
# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {
w[i]*quantile_loss(Y - b[i] - X %*% beta, taus[i])
})
# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))
# 定义优化问题
problem <- Problem(objective, list())
# 求解
result <- solve(problem)
# 提取结果
beta_val <- result$getValue(beta)
b_val <- result$getValue(b)
beta_val
b_val
> beta_val
[,1]
[1,] 1.470763
[2,] 2.758932
> b_val
[,1]
[1,] -1.1859157
[2,] -0.7531299
[3,] -0.2539941
[4,] 0.1203188
[5,] 0.7931756
现在设定不同的权重w:
# 生成n个随机数
n <- length(taus)
random_numbers <- runif(n)
# 归一化使得它们的总和为1
normalized_numbers <- random_numbers / sum(random_numbers)
# 验证它们的总和是否为1
sum(normalized_numbers)
Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量
w = normalized_numbers
# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))
# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {
sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}
# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {
w[i]*quantile_loss(Y - b[i] - X %*% beta, taus[i])
})
# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))
# 定义优化问题
problem <- Problem(objective, list())
# 求解
result <- solve(problem)
# 提取结果
beta_val <- result$getValue(beta)
b_val <- result$getValue(b)
> beta_val
[,1]
[1,] 1.525781
[2,] 2.736585
> b_val
[,1]
[1,] -1.23440023
[2,] -0.79443834
[3,] -0.32373464
[4,] 0.05565281
[5,] 0.79790504
可见估计值有了差异。