Spark2.2源码剖析——SecurityManager

  SecurityManager主要对帐号、权限以及身份认证进行设置和管理。如果 Spark 的部署模式为 YARN,则需要生成 secret key (密钥)并存储 Hadoop UGI。而在其他模式下,则需要设置环境变量 _SPARK_AUTH_SECRET(优先级更高)或者 spark.authenticate.secret 属性指定 secret key (密钥)。最后SecurityManager 中设置了默认的口令认证实例 Authenticator,此实例采用匿名内部类实现,用于每次使用 HTTP client 从 HTTP 服务器获取用户的用户和密码。这是由于 Spark 的节点间通信往往需要动态协商用户名、密码,这种方式灵活地支持了这种需求。

  Spark支持通过共享秘钥进行认证。启用认证功能可以通过参数spark.authenticate来配置。此参数控制spark通信协议是否使用共享秘钥进行认证。这种认证方式基于握手机制,以确保通信双方都有相同的共享秘钥时才能通信。如果共享秘钥不一致,则双方将无法通信。可以通过以下过程来创建共享秘钥:
  1.在spark on YARN部署模式下,配置spark.authenticate为true,就可以自动产生并分发共享秘钥。每个应用程序都使用唯一的共享秘钥。
  2.其他部署方式下,应当在每个节点上都配置参数spark.authenticate.secret。此秘钥将由所有Master、worker及应用程序来使用。

/*
	变量声明
	包名:org.apache.spark
	类名:SparkEnv
*/
val securityManager = new SecurityManager(conf, ioEncryptionKey) 
    ioEncryptionKey.foreach { _ =>
      // 检查是否应启用网络加密。
      if (!securityManager.isEncryptionEnabled()) {
        logWarning("I/O encryption enabled without RPC encryption: keys will be visible on the " +
          "wire.")
      }
    }
/*
	变量处理
	第一步:new SecurityManager()
	包名:org.apache.spark
	类名:SecurityManager
*/
  // 这里配置的是这个属性spark.authenticate,默认为false
  private val authOn = sparkConf.get(NETWORK_AUTH_ENABLED)
  
 // 使用HTTP连接设置口令认证
  // 设定自己的验证器妥善协商HTTP连接/密码的用户。这是从HTTP服务器获取HTTP客户端的需要。把它放在这里,它只被设置一次。
  // 注意这一段话,必须设置spark.authenticate为true,但是设置了这个,如果不是yarn模式运行会报错
  if (authOn) {
    Authenticator.setDefault(
    // 创建口令认证实例,复写PasswordAuthentication方法,获得用户名和密码
      new Authenticator() {
        override def getPasswordAuthentication(): PasswordAuthentication = {
          var passAuth: PasswordAuthentication = null
          val userInfo = getRequestingURL().getUserInfo()
          if (userInfo != null) {
            val  parts = userInfo.split(":", 2)
            passAuth = new PasswordAuthentication(parts(0), parts(1).toCharArray())
          }
          return passAuth
        }
      }
    )
  }

/*
	第二步:检查是否启动网络加密
	包名:org.apache.spark
	类名:SecurityManager
*/
  def isEncryptionEnabled(): Boolean = {
    sparkConf.get(NETWORK_ENCRYPTION_ENABLED) || sparkConf.get(SASL_ENCRYPTION_ENABLED)
  }

SecurityManage主要做了以下几件事,代码如下:

/*
	1、配置相关信息
*/
private val authOn = sparkConf.get(NETWORK_AUTH_ENABLED)
private var aclsOn =
    sparkConf.getBoolean("spark.acls.enable", sparkConf.getBoolean("spark.ui.acls.enable", false))
private var adminAcls: Set[String] =
  stringToSet(sparkConf.get("spark.admin.acls", "")) 
private var adminAclsGroups : Set[String] =
  stringToSet(sparkConf.get("spark.admin.acls.groups", ""))
 private var viewAcls: Set[String] = _

  private var viewAclsGroups: Set[String] = _

  // list of users who have permission to modify the application. This should
  // apply to both UI and CLI for things like killing the application.
  // 具有修改应用程序权限的用户列表。这应该适用于UI和CLI等用于杀死应用程序的东西。
  private var modifyAcls: Set[String] = _

  private var modifyAclsGroups: Set[String] = _

  // always add the current user and SPARK_USER to the viewAcls
  // 随时添加当前用户和spark_user的viewacls
  private val defaultAclUsers = Set[String](System.getProperty("user.name", ""),
    Utils.getCurrentUserName()) // TODO:调试注释  defaultAclUsers:"Set$Set2" size =2  ,这里获取了 0=“hzjs” 1="root"

  setViewAcls(defaultAclUsers, sparkConf.get("spark.ui.view.acls", ""))
  setModifyAcls(defaultAclUsers, sparkConf.get("spark.modify.acls", ""))

  setViewAclsGroups(sparkConf.get("spark.ui.view.acls.groups", ""));
  setModifyAclsGroups(sparkConf.get("spark.modify.acls.groups", "")); 

/*
	2、记录输出相关日志信息
*/
logInfo("SecurityManager: authentication " + (if (authOn) "enabled" else "disabled") +
    "; ui acls " + (if (aclsOn) "enabled" else "disabled") +
    "; users  with view permissions: " + viewAcls.toString() +
    "; groups with view permissions: " + viewAclsGroups.toString() +
    "; users  with modify permissions: " + modifyAcls.toString() +
    "; groups with modify permissions: " + modifyAclsGroups.toString())

/*
	3、在Yarn模式下生成key
*/
private val secretKey = generateSecretKey()

/**
    * Generates or looks up the secret key.
    *
    *  生成或查找密钥。
    *
    * The way the key is stored depends on the Spark deployment mode. Yarn
    * uses the Hadoop UGI.
    *
    * 密钥存储方式取决于Spark部署模式。Yarn采用Hadoop UGI。
    *
    * For non-Yarn deployments, If the config variable is not set
    * we throw an exception.
    *
    * 对于non-Yarn部署模式,如果配置变量没有设置,我们将抛出一个异常。
    */
  private def generateSecretKey(): String = {
    if (!isAuthenticationEnabled) {
      null
    } else if (SparkHadoopUtil.get.isYarnMode) {
      // In YARN mode, the secure cookie will be created by the driver and stashed in the
      // user's credentials, where executors can get it. The check for an array of size 0
      // is because of the test code in YarnSparkHadoopUtilSuite.
      val secretKey = SparkHadoopUtil.get.getSecretKeyFromUserCredentials(SECRET_LOOKUP_KEY)  //TODO: secretKey:null
      if (secretKey == null || secretKey.length == 0) {
        logDebug("generateSecretKey: yarn mode, secret key from credentials is null")
        val rnd = new SecureRandom()
        val length = sparkConf.getInt("spark.authenticate.secretBitLength", 256) / JByte.SIZE //TODO: length:32
        val secret = new Array[Byte](length)
        rnd.nextBytes(secret)

        val cookie = HashCodes.fromBytes(secret).toString() //TODO: cookie:"wery7237rwuhr732582irwberyu238grfuyewgr78....."
        SparkHadoopUtil.get.addSecretKeyToUserCredentials(SECRET_LOOKUP_KEY, cookie)
        cookie
      } else {
        new Text(secretKey).toString
      }
    } else {
      // user must have set spark.authenticate.secret config
      // For Master/Worker, auth secret is in conf; for Executors, it is in env variable
      Option(sparkConf.getenv(SecurityManager.ENV_AUTH_SECRET))
        .orElse(sparkConf.getOption(SecurityManager.SPARK_AUTH_SECRET_CONF)) match {
        case Some(value) => value
        case None =>
          throw new IllegalArgumentException(
            "Error: a secret key must be specified via the " +
              SecurityManager.SPARK_AUTH_SECRET_CONF + " config")
      }
    }
  }

/**
    * Check to see if authentication for the Spark communication protocols is enabled
    * 检查是否启用了Spark通信协议的身份验证。
    * @return true if authentication is enabled, otherwise false
    */
  def isAuthenticationEnabled(): Boolean = authOn

/*
	4、使用HTTP连接设置口令认证
*/
// 使用HTTP连接设置口令认证
  // 设定自己的验证器妥善协商HTTP连接/密码的用户。这是从HTTP服务器获取HTTP客户端的需要。把它放在这里,它只被设置一次。
  // 注意这一段话,必须设置spark.authenticate为true,但是设置了这个,如果不是yarn模式运行会报错
  if (authOn) {
    Authenticator.setDefault(
      new Authenticator() {
        override def getPasswordAuthentication(): PasswordAuthentication = {
          var passAuth: PasswordAuthentication = null
          val userInfo = getRequestingURL().getUserInfo()
          if (userInfo != null) {
            val  parts = userInfo.split(":", 2)
            passAuth = new PasswordAuthentication(parts(0), parts(1).toCharArray())
          }
          return passAuth
        }
      }
    )
  }

/*
	5、配置SSL
*/
//默认的通信SSL配置
private val defaultSSLOptions = SSLOptions.parse(sparkConf, "spark.ssl", defaults = None)

//文件服务的SSL
val fileServerSSLOptions = getSSLOptions("fs")
//配置sslSocketFactory,hostnameVerifier
val (sslSocketFactory, hostnameVerifier) = if (fileServerSSLOptions.enabled) {
    val trustStoreManagers =
      for (trustStore <- fileServerSSLOptions.trustStore) yield {
        val input = Files.asByteSource(fileServerSSLOptions.trustStore.get).openStream()

        try {
          val ks = KeyStore.getInstance(KeyStore.getDefaultType)
          ks.load(input, fileServerSSLOptions.trustStorePassword.get.toCharArray)

          val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
          tmf.init(ks)
          tmf.getTrustManagers
        } finally {
          input.close()
        }
      }

    lazy val credulousTrustStoreManagers = Array({
      logWarning("Using 'accept-all' trust manager for SSL connections.")
      new X509TrustManager {
        override def getAcceptedIssuers: Array[X509Certificate] = null

        override def checkClientTrusted(x509Certificates: Array[X509Certificate], s: String) {}

        override def checkServerTrusted(x509Certificates: Array[X509Certificate], s: String) {}
      }: TrustManager
    })

    require(fileServerSSLOptions.protocol.isDefined,
      "spark.ssl.protocol is required when enabling SSL connections.")

    val sslContext = SSLContext.getInstance(fileServerSSLOptions.protocol.get)
    sslContext.init(null, trustStoreManagers.getOrElse(credulousTrustStoreManagers), null)

    val hostVerifier = new HostnameVerifier {
      override def verify(s: String, sslSession: SSLSession): Boolean = true
    }

    (Some(sslContext.getSocketFactory), Some(hostVerifier))
  } else {
    (None, None)
  }


def getSSLOptions(module: String): SSLOptions = {
    val opts = SSLOptions.parse(sparkConf, s"spark.ssl.$module", Some(defaultSSLOptions))
    logDebug(s"Created SSL options for $module: $opts")
    opts
  }

你可能感兴趣的:(大数据,Spark)