复合分位回归的求解

发布时间:2023年12月22日

复合分位回归

类似分位回归的,给定分位数序列 0 < τ 1 < τ 2 < ? < τ K < 1 0<\tau_1<\tau_2<\cdots<\tau_K<1 0<τ1?<τ2?<?<τK?<1,复合分位回归的目的不再是在一个分位点上最小化损失函数,而是在多个分位点上同时最小化check function. 则估计回归系数 β \boldsymbol{\beta} β的估计是通过如下目标函数得到的:
( b ^ 1 , … , b ^ K , β ^ C Q R ) = arg?min ? b 1 … , b k , β ∑ k = 1 K { ∑ i = 1 n ρ τ k ( y i ? b k ? x i ? β ) } (\hat{b}_1,\dots,\hat{b}_K,\hat{\boldsymbol{\beta}}^{\mathrm{CQR}})=\underset{b_1\dots,b_k,\boldsymbol{\beta}}{\argmin}\sum_{k=1}^K\left\{ \sum_{i=1}^n\rho_{\tau_k} (y_i-b_k-\bold{x}_i^{\top}\boldsymbol{\beta})\right\} (b^1?,,b^K?,β^?CQR)=b1?,bk?,βargmin?k=1K?{i=1n?ρτk??(yi??bk??xi??β)}
通常我们会取等距分位序列: τ k = k K + 1 , k = 1 , 2 , … , K \tau_k=\frac{k}{K+1},k=1,2,\dots,K τk?=K+1k?,k=1,2,,K. 同时给出估计量的渐近分布: n ( β ^ C Q R ? β ? ) → N ( 0 , Σ C Q R ) \sqrt{n}(\hat{\boldsymbol{\beta}}^{\mathrm{CQR}}-\boldsymbol{\beta}^*)\to N(0,\boldsymbol{\Sigma}_{\mathrm{CQR}}) n ?(β^?CQR?β?)N(0,ΣCQR?)其中
Σ C Q R = C ? 1 ∑ k , k ′ = 1 K min ? ( τ k , τ k ′ ) ( 1 ? max ? ( τ k , τ k ′ ) ) ( ∑ k = 1 K f ( b τ k ? ) ) 2 \boldsymbol{\Sigma}_{\mathrm{CQR}}=\bold{C}^{-1}\frac{\sum_{k,k'=1}^K\min(\tau_k,\tau_{k'})(1-\max(\tau_k,\tau_k'))}{(\sum_{k=1}^Kf(b^*_{\tau_k}))^2} ΣCQR?=C?1(k=1K?f(bτk???))2k,k=1K?min(τk?,τk?)(1?max(τk?,τk?))?
其中 C \bold{C} C
lim ? n → ∞ 1 n X ? X = C \lim_{n\to\infty}\frac{1}{n}\bold{X}^{\top}\bold{X}=\bold{C} nlim?n1?X?X=C

1.1 基于MM算法的Python实现

data = pd.read_csv(r"C:\Users\beida\Desktop\sc\rent.csv")
n = len(data); p = 1
x = np.array(data[["cons", 'area']])

y = np.array(data["rent_euro"], dtype=np.float64)
#beta = np.matrix([100, 2.6]).reshape(p, 1)
# np.set_printoptions(precision=8)
tau = np.arange(1, 6)/6
k = len(tau)
maxit = 1000
toler = 0.0001
error = 10000
iteration = 1
p = 2
u = np.zeros(k)
r = np.zeros((n, k))
signw = np.zeros((n, k))
z = np.zeros((n, k))
newX = np.zeros((n, k))
#beta = np.matrix([1, 1], dtype=np.float64).reshape(2, 1)
beta = np.linalg.pinv(x.T.dot(x)).dot(x.T).dot(y)
#print(beta, "ols")

while (iteration <= maxit) & (error > toler):
    betaold = beta.copy()
    #print(betaold, 'betaold')
    uv = np.sort(y - x.dot(beta), axis=0)  # yes
    quantile1 = (n-1)*tau - np.floor((n - 1)*tau)  # yes
    for i in range(0, k):
        u[i] = quantile1[i] * uv[int(np.ceil((n - 1)*tau[i]))] \
            + (1-quantile1[i]) * uv[int(np.floor((n - 1)*tau[i]))]
    yh = x.dot(beta)

    for i in range(0, k):
        r[:, i] = y - u[i] - yh
        signw[:, i] = (1 - np.sign(r[:, i]))/2 * (1 - tau[i])  \
            + (np.sign(r[:, i]) + 1) * tau[i]/2
    for j in range(0, p):
        xbeta = beta[j] * x[:, j]
        for i in range(0, k):
            z[:, i] = (r[:, i]+xbeta)/x[:, j]
            newX[:, i] = x[:, j] * signw[:, i]

        vz = z.flatten()
        order = vz.argsort()
        sortz = vz[order]  # yes
        vnewX = newX.flatten()
        w = np.abs(vnewX[order])
        index = np.where(np.cumsum(w) > (np.sum(w)/2))[0][0]
        # print(index)
        beta[j] = sortz[index]
    error = np.sum(np.abs(beta-betaold))
    iteration = iteration + 1
print("beta:", beta)
print("tau:", tau)
print("cons:", np.percentile((y-x.dot(beta)), tau*100))

结果

> beta: [135.63724592   4.3381923 ]
> tau: [0.16666667 0.33333333 0.5        0.66666667 0.83333333]
> cons: [-114.9981428   -40.30912765   16.70578506   75.41938784  165.26588296]

1.2 基于MM算法的CQR及其R实现

cqrmm(x=x,y=y,tau=tau)
set.seed(1)
n=100
p=2
a=rnorm(n*p, mean = 1, sd =1)
x=matrix(a,n,p)
beta=rnorm(p,1,1)
beta=matrix(beta,p,1)
y=x%*%beta-matrix(rnorm(n,0.1,1),n,1)
tau=1:5/6
# x is 1000*10 matrix, y is 1000*1 vector, beta is 10*1 vector
cqr.mm(x,y,tau)


cqrmm = function(x, y, beta, to, m, tau){
  if (missing(to)){
    toler = 1e-3
  }else{toler = to}
  if (missing(m)){
    maxit = 200
  }else{maxit = m}
  if (missing(tau)){
    cat('no tau_k input','\n')
    tau=1:5/6
  }else{tau=tau}
  x = x
  X = x
  #arma:: mat x=(xr),r,product,xt,denominator;
  #arma:: vec W,uv,v,y=(yr),delta;
  #arma:: vec betaold,beta=(betar),quantile,u,yh;
  #arma::uvec order, index;
  n=nrow(x)
  p = ncol(x)
  k=length(tau)
  error=10000
  epsilon=0.9999

  iteration=1;
  #u.zeros(k);
  #r.zeros(n,k);
  u <- numeric(k)
  r <- matrix(0, n, k)
  if (missing(beta)){
    beta = solve(t(x)%*%x, t(x)%*%y)
    }else{beta = beta}
  xt=t(x)
  product <- matrix(1, p, n)
  
  
  while (iteration<=maxit && error>toler)
  {
    betaold = beta;
    yh = x%*% beta; #y_hat
    uv = sort(y - yh)
    
    # u is vec of the quantiles of given vector
    quantile = (n-1) * tau - floor((n-1) * tau)
    
    
    for (i in 1:k){	
      u[i] = quantile[i] * uv[ceiling((n-1) * tau[i])] + (1 - quantile[i]) * uv[floor((n-1) * tau[i])]
    }
    
    for (i in 1:k){
      r[,i] = y-u[i]-yh;
    }
    denominator=1/(abs(r)+epsilon)
    W = rowSums(denominator)
    v <- k - 2 * sum(tau) - rowSums(r * denominator)
    for (i in 1:n){
      product[,i] = xt[,i]*W[i]
      }
    
    delta = solve(product%*%x, xt%*%v);
    
    beta = beta-delta
    error = sum(abs(delta))
    iteration = iteration + 1
  }
  b=quantile(y-X%*%beta, tau)
  
  return(list(beta=beta,b=b))
}

1.3 基于EM算法的R语言实现

library(MASS)
library(cqrReg)
set.seed(NULL)
tau.k = 1:5 / 6

CQREM = function(X, y, tau, betar, weight, maxit, toler) {
  if (missing(betar)) {
    beta = solve(t(X) %*% X, t(X) %*% y)
    cat(beta,'\n')
  }
  if (missing(tau)) {
    tau.k = 1:5 / 6
    alpha = tau
  }
  K = length(tau)
  if (missing(weight)) {
    weight = rep(1, times = K)
  }
  if (missing(maxit)) {
    maxit = 1000
  }
  if(missing(toler)){
    toler = 1e-5
  }

  weight = weight
  alpha = tau
  n = length(y)
  tau.k = tau
  theta.1 = (1 - 2 * tau.k) / (tau.k * (1 - tau.k))
  theta.2 = 2 / (tau.k * (1 - tau.k))
  error = 10000
  epsilon = 0.9999
  iteration = 1
  while (iteration <= maxit && error > toler) {
  #for (i in 1:maxit){ 
   rbar = matrix(NA, n, K)
    mu_new = matrix(NA, n, K)
    mu = matrix(NA, n, K)
    w.ik = matrix(NA, n, K)
    d.ik = matrix(NA, n, K)
    r = y - X %*% beta
    r
    for (k in 1:K) {
      rbar[, k] <- sum(r - alpha[k])
    }
    rbar
    delta2 = sqrt((theta.1 ^ 2 + 2 * theta.2)) / abs(rbar)
    delta2
    delta3 = (theta.2 * weight) / (theta.1 ^ 2 + 2 * theta.2) + abs(rbar) / (sqrt(theta.1 ^ 2 + 2 * theta.2))
    delta3
    for (k in 1:K) {
      w.ik[, k] = delta2[, k] / (theta.2 * weight)[k]
    }
    w.ik
    w.i = rowSums(w.ik)
    w.i
    W = diag(w.i)
    W
    #svd_W <- svd(W)
    #U <- svd_W$u
    #V <- svd_W$v
    #D <- svd_W$d
    # 构建逆矩阵
    #W_inv <- diag(1/D)
    #W_reg <- W + lambda * diag(nrow(W))
    for (k in 1:K) {
      d.ik[, k] = (theta.1 + alpha * delta2)[, k] / (theta.2 * weight)[k]
    }
    d.ik
    d.i = rowSums(d.ik)
    d.i
    D = as.vector(d.i)
    ybar = y - solve(W) %*% D
    ybar
    beta
    beta_new = solve(t(X) %*% W %*% X) %*% t(X) %*% W %*% ybar
    beta_new
    A = matrix(NA, n, K)
    for (k in 1:K){
      A[,k] = (y-X%*%beta_new)*delta2[, k]
    }
    alpha
    alpha_new=colSums(A - n*theta.1)/colSums(delta2)
    for (k in 1:K) {
      mu[, k] = X %*% beta_new + alpha_new[k]
    }
    weight_new = (2 / (3 * n)) * colSums((y - mu) ^ 2 / (2 * theta.2) * delta2 +
                                           (theta.1 ^ 2 + 2 * theta.2) / (2 * theta.2) * delta3 -
                                           (theta.1 * (y - mu)) / (theta.2))
    

    #alpha_new = colSums(as.numeric((y[1] - t(
    #  as.matrix(X[1,], row = 1, col = 2)
    #)) %*% beta) * delta2)
    alpha_new

    error = sum(abs(beta_new))
    beta = beta_new
    alpha = alpha_new
    weight = weight_new
    
    iteration = iteration + 1
    #cat(error,'\n')
  }
  b = quantile(y - X %*% beta, tau.k)
  return(list(beta = beta, b = b))
}

n = 300
p = 3
X = matrix(rnorm(n*p, 0, 1), n, p)
y = rnorm(n)
CQREM(X, y, tau=tau.k, maxit = 1000, tol=1e-5)
cqr.mm(X, y, tau= tau.k,toler = 1e-5)

>$beta
            [,1]
[1,] -0.01504609
[2,] -0.14649880
[3,]  0.23344365

$b
   16.66667%    33.33333%          50%    66.66667%    83.33333% 
-0.881195701 -0.428558273 -0.000221886  0.470748757  1.102072419

$beta
            [,1]
[1,]  0.05258097
[2,] -0.05178250
[3,]  0.13393040

$b
    16.66667%     33.33333%           50%     66.66667%     83.33333% 
-0.9269618879 -0.3647195276 -0.0001248183  0.4284496629  1.0367478727 

EM算法的速度显著慢于MM算法,同时在估计结果上也有略微的差异。

1.4 基于凸优化(CVX)的复合分位回归

实际上还有一些暴力的求解方法,比如直接凸优化

library(CVXR)
library(cqrReg)
set.seed(1)
n=100
p=2
a=rnorm(n*p, mean = 1, sd =1)
x=matrix(a,n,p)
beta=rnorm(p,1,1)
beta=matrix(beta,p,1)
y=x%*%beta-matrix(rnorm(n,0.1,1),n,1)
tau=1:5/6
cqr.mm(x,y,tau)
# 假设数据已经定义好
Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量

# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))

# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {
  sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}


# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {
  quantile_loss(Y - b[i] - X %*% beta, taus[i])
})

# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))

# 定义优化问题
problem <- Problem(objective, list())

# 求解
result <- solve(problem)

# 提取结果
result$getValue(beta)
>          [,1]
> [1,] 1.470763
> [2,] 2.758932

result$getValue(b)
>
>            [,1]
> [1,] -1.1859157
> [2,] -0.7531299
> [3,] -0.2539941
> [4,]  0.1203188
> [5,]  0.7931756

> cqr.mm(x,y,tau)
$beta
         [,1]
[1,] 1.508830
[2,] 2.749125

$b
  16.66667%   33.33333%         50%   66.66667%   83.33333% 
-1.21780307 -0.78448530 -0.29961304  0.05043142  0.69900106 

可以看到,与MM算法也存在一些差异

进一步思考

加权复合分位回归

复合分位回归是最小化一系列分位损失函数,但不难看出,CQR是赋予了每个 ρ τ k \rho_{\tau_k} ρτk??完全相同的权重,那么就可以考虑不同的权重。这样得到的就是加权的CQR(WCQR)
( α ^ 1 , … , α ^ K , β ^ W C Q R ) = arg ? min ? α 1 , … , α K , β ∑ i = 1 N ∑ k = 1 K ω k ? ρ τ k ( Y i ? X i T β ? α k ) . \left(\hat{\alpha}_{1}, \ldots, \hat{\alpha}_{K}, \hat{\beta}^{\mathrm{WCQR}}\right)=\arg \min _{\alpha_{1}, \ldots, \alpha_{K}, \beta} \sum_{i=1}^{N} \sum_{k=1}^{K} \omega_{k} \cdot \rho_{\tau_{k}}\left(Y_{i}-X_{i}^{T} \beta-\alpha_{k}\right) . (α^1?,,α^K?,β^?WCQR)=argα1?,,αK?,βmin?i=1N?k=1K?ωk??ρτk??(Yi??XiT?β?αk?).
求解原理与CQR类似的,加入设定相同的权重w,那么就应该得到与CQR完全相同的结果

Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量
w = c(0.2,0.2,0.2,0.2,0.2)
# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))

# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {
  sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}


# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {
  w[i]*quantile_loss(Y - b[i] - X %*% beta, taus[i])
})

# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))

# 定义优化问题
problem <- Problem(objective, list())

# 求解
result <- solve(problem)

# 提取结果
beta_val <- result$getValue(beta)
b_val <- result$getValue(b)


beta_val
b_val


> beta_val
         [,1]
[1,] 1.470763
[2,] 2.758932
> b_val
           [,1]
[1,] -1.1859157
[2,] -0.7531299
[3,] -0.2539941
[4,]  0.1203188
[5,]  0.7931756

现在设定不同的权重w:


# 生成n个随机数
n <- length(taus)
random_numbers <- runif(n)

# 归一化使得它们的总和为1
normalized_numbers <- random_numbers / sum(random_numbers)

# 验证它们的总和是否为1
sum(normalized_numbers)


Y <- y # 响应变量
X <- x # 解释变量矩阵
taus <- tau # 分位数向量
w = normalized_numbers
# 定义决策变量
beta <- Variable(ncol(X))
b <- Variable(length(taus))

# 定义分位数损失函数
quantile_loss <- function(residuals, tau) {
  sum(pos(residuals) * tau + pos(-residuals) * (1 - tau))
}


# 构建目标函数的每一部分
objective_parts <- lapply(1:length(taus), function(i) {
  w[i]*quantile_loss(Y - b[i] - X %*% beta, taus[i])
})

# 合并目标函数的各个部分
objective <- Minimize(Reduce(`+`, objective_parts))

# 定义优化问题
problem <- Problem(objective, list())

# 求解
result <- solve(problem)

# 提取结果
beta_val <- result$getValue(beta)
b_val <- result$getValue(b)

> beta_val
         [,1]
[1,] 1.525781
[2,] 2.736585
> b_val
            [,1]
[1,] -1.23440023
[2,] -0.79443834
[3,] -0.32373464
[4,]  0.05565281
[5,]  0.79790504

可见估计值有了差异。

文章来源:https://blog.csdn.net/qq_44638724/article/details/135146661
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。