sklearn 源码解析 基本线性模型 岭回归 ridge.py(1)

对于前面已经提到的类及一些细节不再给出。对于稀疏矩阵的了解是必要的。
from abc import ABCMeta, abstractmethod
import warnings

import numpy as np
from scipy import linalg
from scipy import sparse
from scipy.sparse import linalg as sp_linalg

from .base import LinearClassifierMixin, LinearModel, _rescale_data
from .sag import sag_solver :是一种随机平均梯度下降式的求解岭回归与logistic回归的包,
  使用梯度下降法,
   这种算法收敛很快。
from ..base import RegressorMixin
from ..utils.extmath import safe_aparse_dot
from ..utils.extmath import row_norms: 行范数,不支持稀疏矩阵。
from ..utils import check_X_y
from ..utils import check_array :转换为ndarray类型。
from ..utils import check_consistent_length: 检查一个ndarray的list 是否所有元素第一个
   维度都相等。(i.e. same length)
from ..utils import column_or_1d: 特殊的拉直函数,接受类似feature形式的ndarray,
   即在第二个维度上只有1维,并将其按列拉直为以为数组。
from ..preprocessing import LabelBinarizer:
   对一对多问题将标签进行二值化。
from ..model_selection import GridSearchCV
from ..externsls import six
from ..metrics.scorer import check_scoring: 对能够进行返回score估计的模型,返回进行
   score计算的函数。


下面先不对具体的类,而是接口进行说明。(无组织架构)
sp_linalg.aslinearoperator: 将对象(ndarray, sparse, matrix an so on)转换为线性算子。
np.empty(shape): 返回相应shape的未初始化的ndarray.
sp_linalg.cg: 使用共轭梯度(Conjugate Gradient)法解线性系统。
sq_linalg.lsqr: lsqr :QR分解。
ndarray.flat:(。。。)
np.atleast_1d: 标量化为一维数组,高维保持。

def _solve_sparse_cg(X, y, alpha, max_iter = None, tol = 1e-3, verbose = 0):
 n_samples, n_features = X.shape
 X1 = sp_linalg.aslinearoperator(X)
 coefs = np.empty((y.shape[1], n_features))

 if n_features > n_samples:
  def create_mv(curr_alpha):
   def _mv(x):
    return X1.matvec(X1.rmatvec(x)) + curr_alpha * x
   return _mv 
 else:
  def create_mv(curr_alpha):
   def _mv(curr_alpha):
    return X1.rmatvec(X1.matvec(x)) + curr_alpha * x 
   return _mv 

 for i in range(y.shape[1]):
  y_column = y[:, i]

  mv = create_mv(alpha[i])
  if n_features > n_samples:
   C = sp_linalg.LinearOperator((n_samples, n_samples), matvec = mv, dtype = x.dtype)
   coef, info = sp_linalg.cg(C, y_column, tol = tol)
   coefs[i] = X1.rmatvec(coef)
  else:
   y_column = X1.rmatvec(y_column)
   C = sp_linalg.LinearOperator((n_features, n_features), matvec = mv, dtype = x.dtype)
   coefs[i], info = sp_linalg.cg(C, y_column, maxiter = max_iter, tol = tol)

  if info < 0:
   raise ValueError("Failed with error code %d" % info)

  if max_iter is None and info > 0 and verbose:
   warning.warn("sparse_cg did not coverge sfter %d iterations." % info)
 return coefs



_solve_sparse_cg:
 这里将在理论意义上求逆矩阵的过程转换为线性系统的解。
 所用到的线性算子仅仅是定义了对于矩阵的变换,matvec 向量乘积变换,
 rmatvec 共轭转置变换。
 函数分n_features 与n_samples的大小进行了分类,可以证明在取逆的情况
 下是相等的。(Kernel Ridge Regression.pdf)
 返回系数

def _solve_lsqr(X, y, alpha, max_iter = None, tol = 1e-3):
 n_samples, n_features = X.shape
 coefs = np.empty((y.shape[1], n_features))
 n_iter = np.empty(y.shape[1], dtype = np.int32)

 sqrt_alpha = np.sqrt(alpha)

 for i in range(y.shape[1]):
  y_column = y[:,i]
  info = sp_linalg.lsqr(X, y_column, damp = sqrt_alpha[i], atol = tol, btol= tol, iter_lim = max_iter)
  coefs[i] = info[0]
  n_iter[i] = info[2]

 return coefs, n_iter



_solve_lsqr:
 返回线性系统的最小二乘解。

def _solve_cholesky(X, y, alpha):
 n_samples, n_features = X.shape
 n_targets = y.shape[1]

 A = safe_sparse_dot(X.T, X, dense_output = True)
 Xy = safe_sparse_dot(X.T, y, dense_output = True)

 one_alpha = np.array_equal(alpha, len(alpha) * [alpha[0]])
 if one_alpha:
  A.flat[::n_features + 1] ++ alpha[0]
  return linalg.solve(A, Xy, sym_pos = True, overwrite_a = True)
 else:
  coefs = np.empty([n_targets, n_features])
  for coef, target , curr_alpha in zip(coefs, Xy.T, alpha):
   A.flat[::n_features + 1] += curr_alpha 
   coef[:] = linalg.solve(A, target, sym_pos = True, overwrite_a = False).ravel()
   A.flat[::n_features + 1] -= curr_alpha
  return coefs



_solve_cholesky:
 由于在linalg.solve中选择了sym_pos为True,故指定了矩阵为对称正定矩阵,
 采用cholesky分解来解(分解为三角阵的内积)
 A.flat[::n_features + 1]实现了对对角元素的简写。

def _solve_cholesky_kernel(K, y, alpha, sample_weight = None, copy = False):
 n_samples = K.shape[0]
 n_targets = y.shape[1]

 if copy:
  K = K.copy()

 alpha = np.atleast_1d(alpha)
 one_alpha = (alpha == alpha[0]).all()
 has_sw = isinstance(sample_weight, np.ndarray) or sample_weight not in [1.0, None]

 if has_sw:
  sw = np.sqrt(np.atleast_1d(sample_weight))
  y = y * sw[:, np.newaxis]
  K *= np.outer(sw, sw)

 if one_alpha:
  K.flat[::n_samples + 1] += alpha[0]
  try:
   dual_coef = linalg.solve(K, y, sym_pos = True, overwrite_a = False)
  except np.linalg.LinAlgError:
   warning.warn("Singular matrix in solving dual problem. using "
    "least-squares solution instead.")
   dual_coef = linalg.lstsq(K, y)[0]

  K.flat[::n_samples + 1] -= alpha[0]

  if has_sw:
   dual_coef *= sw[:,np.newaxis]

  return dual_coef
 else:
  dual_coefs = np.empty([n_targets, n_samples])

  for dual_coef, target, current_alpha in zip(dual_coefs, y.T, alpha):
   K.flat[::n_samples + 1] += current_alpha 
   dual_coef[:] = linalg.solve(K, target, sym_pos = True, overwrite_a = False).ravel()
   K.flat[::n_samples + 1] -= current_alpha
  if has_sw:
   dual_coefs *= sw[np.newaxis, :]

  return dual_coefs.T 



_solve_cholesky_kernel:
 该函数允许对样本赋予权重,唯一与_solve_cholesky_kernel的不同是其使用的核
 矩阵需要给出。
 还提供了当解等式linalg.solve失效时使用linalg.lstsq求最小二乘解的方案。

def _solve_svd(X, y, alpha):
 U, s, Vt = linalg.svd(X, full_matrices = False)
 idx = s > 1e-15 
 s_nnz = s[idx][:,np.newaxis]
 UTy = np.dot(U.T, y)
 d = np.zeros((s.size, alpha.size))
 d[idx] = s_nnz / (s_nnz ** 2 + alpha)
 d_UT_y = d * UTy
 return np.dot(Vt.T, d_UT_y).T



_solve_svd:
 这里仅仅是将上面_solve_cholesky_kernel的过程先将X进行奇异值分解,
 将奇异值推导的岭回归结果求解出来。
 这里以1e-15为阈值,砍掉了小的奇异值(与scipy.linalg.pinv求广义逆矩阵
 特征值阈值相同)

def ridge_regression(X, y, alpha, sample_weight = None, solver = 'auto',
     max_iter = None, tol = ie-3, verbose = 0, random_state = None,
     return_n_iter = False, return_intercept = False):
 if return_intercept and sparse.issparse(X) and solver != 'sag':
  if solver != 'auto':
   warning.warn("In Ridge, only 'sag' solver can currently fit the "
    "intercept when X is sparse. Solver has been automatically "
    "changed into 'sag'.")
  solver = 'sag'

 if solver == 'sag':
  X = check_array(X, accept_sparse = ['csr'],
      dtype = np.float64, order = 'C')
  y = check_array(y, dtype = np.float64, ensure_2d = False, order = 'F')
 else:
  X = check_array(X, accept_sparse = ['csr', 'csc', 'coo'], dtype = np.float64)
  y = check_array(y, dtype = 'numeric', ensure_2d = False)

 check_consistent_length(X, y)
 n_samples, n_features = X.shape 

 if y.ndim > 2:
  raise ValueError("Target y has the wrong shape %s" % str(y,shape))

 ravel = False 
 if y.ndim == 1:
  y = y.reshpe(-1, 1)
  ravel = True

 n_samples_, n_targets = y.shape 
 if n_samples != n_samples_:
  raise ValueError("Number of samples in X and y does not correspond:"
     " %d != %d" % (n_samples, n_samples_))

 has_sw = sample_weight is not None 

 if solver == 'auto':
  if not sparse.issparse(X) or has_sw:
   solver = 'cholesky'
  else:
   solver = 'sparse_cg'

 elif solver == 'lsqr' and not hasattr(sp_linalg, 'lsqr'):
  warning.warn("lsqr not avaliable on this machine, falling back to sparse_cg")
  solver = 'sparse_cg'

 if has_sw:
  if np.atleast_1d(sample_weight).ndim > 1:
   raise ValueError("Sample weights must be 1D array or scalar")

  if solver != 'sag':
   X, y = _rescale_data(X, y, sample_weight)

 alpha = np.asarray(alpha).ravel()
 if alpha.size not in [1, n_targets]:
  raise ValueError("Number of targets nad number of penalties "
      "do nt correspond: %d != %d" % (alpha.size, n_targets))

 if alpha.size == 1 and n_targets > 1:
  alpha = np.repeat(alpha, n_targets)

 if solver not in ('sparse_cg', 'cholesky', 'svd', 'lsqr', 'sag'):
  raise ValueError('Solver %s not understood' % solver)

 n_iter = None 
 if solver == 'sparse_cg':
  coef = _solve_sparse_cg(X, y, alpha, max_iter, tol, verbose)
 elif solver == 'lsqr':
  coef, n_iter = _solve_lsqr(X, y, alpha, max_iter, tol)
 elif solver == 'cholesky':
  if n_features > n_samples:
   K = safe_sparse_dot(X, X.T, dense_output = True)
   try:
    dual_coef = _solve_cholesky_kernel(K, y, alpha)
    coef = safe_sparse_dot(X.T, dual_coef, dense_output = True).T 
   except linalg.LinAlgError:
    solver = 'svd'
  else:
   try:
    coef = _solve_cholesky(X, y, alpha)
   except linalg.LinAlgError:
    solver = 'svd'
 elif solver == 'sag':
  max_squared_sum = row_norms(X, squared = True).max()
  coef = np.empty([y.shape[1], n_features])
  n_iter = np.empty(y.shape[1], dtype = np.int32)
  intercept = np.zeros((y.shape[1],))
  for i, (alpha_i, target) in enumerate(zip(alpha, y.T)):
   init = {'coef': np.zeros((n_features + int(return_intercept), 1))}
   coef_, n_iter_, _ = sag_solver(X, target.ravel(), sample_weight, 'squared',
    alpha_i, max_iter, tol, verbose, random_state, False, max_squared_sum, init)
   if return_intercept:
    coef[i] = coef_[:-1]
    intercept[i] = coef_[-1]
   else:
    coef[i] = coef_
   n_iter[i] = n_iter_

  if intercept.shape[0] == 1:
   intercept = intercept[0]
  coef = np.asarray(coef)

 if solver == 'svd':
  if sparse.issparse(X):
   raise TypeError('SVD solver does not support sparse inputs currently')
  coef = _solve_svd(X, y, alpha)

 if ravel:
  coef = coef.ravel() 

 if return_n_iter and return_intercept:
  return coef, n_iter, intercept 
 elif return_intercept:
  return coef, intercept 
 elif return_n_iter:
  return coef, n_iter 
 else:
  return coef



ridge_regression:
 这仅仅是对上面所提到的具体方法的统一接口。 

 岭回归当数据阵为sparse时仅能用随机梯度下降法"sag"求解。
 
 当solver选项为'auto'时,对非稀疏矩阵或有加权的情况,采用cholesky
 分解法解,否则使用sparse_cg

 n_features > n_samples 在cholesky分解下采用核方法(仅仅是不变的线性核)
 从这里可以看到当矩阵为奇异时采用svd奇异值分解求解。

 svd不支持稀疏矩阵求解。
 
 



 












你可能感兴趣的:(机器学习,Sklearn)