SecurityManager主要对帐号、权限以及身份认证进行设置和管理。如果 Spark 的部署模式为 YARN,则需要生成 secret key (密钥)并存储 Hadoop UGI。而在其他模式下,则需要设置环境变量 _SPARK_AUTH_SECRET(优先级更高)或者 spark.authenticate.secret 属性指定 secret key (密钥)。最后SecurityManager 中设置了默认的口令认证实例 Authenticator,此实例采用匿名内部类实现,用于每次使用 HTTP client 从 HTTP 服务器获取用户的用户和密码。这是由于 Spark 的节点间通信往往需要动态协商用户名、密码,这种方式灵活地支持了这种需求。
Spark支持通过共享秘钥进行认证。启用认证功能可以通过参数spark.authenticate来配置。此参数控制spark通信协议是否使用共享秘钥进行认证。这种认证方式基于握手机制,以确保通信双方都有相同的共享秘钥时才能通信。如果共享秘钥不一致,则双方将无法通信。可以通过以下过程来创建共享秘钥:
1.在spark on YARN部署模式下,配置spark.authenticate为true,就可以自动产生并分发共享秘钥。每个应用程序都使用唯一的共享秘钥。
2.其他部署方式下,应当在每个节点上都配置参数spark.authenticate.secret。此秘钥将由所有Master、worker及应用程序来使用。
/*
变量声明
包名:org.apache.spark
类名:SparkEnv
*/
val securityManager = new SecurityManager(conf, ioEncryptionKey)
ioEncryptionKey.foreach { _ =>
// 检查是否应启用网络加密。
if (!securityManager.isEncryptionEnabled()) {
logWarning("I/O encryption enabled without RPC encryption: keys will be visible on the " +
"wire.")
}
}
/*
变量处理
第一步:new SecurityManager()
包名:org.apache.spark
类名:SecurityManager
*/
// 这里配置的是这个属性spark.authenticate,默认为false
private val authOn = sparkConf.get(NETWORK_AUTH_ENABLED)
// 使用HTTP连接设置口令认证
// 设定自己的验证器妥善协商HTTP连接/密码的用户。这是从HTTP服务器获取HTTP客户端的需要。把它放在这里,它只被设置一次。
// 注意这一段话,必须设置spark.authenticate为true,但是设置了这个,如果不是yarn模式运行会报错
if (authOn) {
Authenticator.setDefault(
// 创建口令认证实例,复写PasswordAuthentication方法,获得用户名和密码
new Authenticator() {
override def getPasswordAuthentication(): PasswordAuthentication = {
var passAuth: PasswordAuthentication = null
val userInfo = getRequestingURL().getUserInfo()
if (userInfo != null) {
val parts = userInfo.split(":", 2)
passAuth = new PasswordAuthentication(parts(0), parts(1).toCharArray())
}
return passAuth
}
}
)
}
/*
第二步:检查是否启动网络加密
包名:org.apache.spark
类名:SecurityManager
*/
def isEncryptionEnabled(): Boolean = {
sparkConf.get(NETWORK_ENCRYPTION_ENABLED) || sparkConf.get(SASL_ENCRYPTION_ENABLED)
}
SecurityManage主要做了以下几件事,代码如下:
/*
1、配置相关信息
*/
private val authOn = sparkConf.get(NETWORK_AUTH_ENABLED)
private var aclsOn =
sparkConf.getBoolean("spark.acls.enable", sparkConf.getBoolean("spark.ui.acls.enable", false))
private var adminAcls: Set[String] =
stringToSet(sparkConf.get("spark.admin.acls", ""))
private var adminAclsGroups : Set[String] =
stringToSet(sparkConf.get("spark.admin.acls.groups", ""))
private var viewAcls: Set[String] = _
private var viewAclsGroups: Set[String] = _
// list of users who have permission to modify the application. This should
// apply to both UI and CLI for things like killing the application.
// 具有修改应用程序权限的用户列表。这应该适用于UI和CLI等用于杀死应用程序的东西。
private var modifyAcls: Set[String] = _
private var modifyAclsGroups: Set[String] = _
// always add the current user and SPARK_USER to the viewAcls
// 随时添加当前用户和spark_user的viewacls
private val defaultAclUsers = Set[String](System.getProperty("user.name", ""),
Utils.getCurrentUserName()) // TODO:调试注释 defaultAclUsers:"Set$Set2" size =2 ,这里获取了 0=“hzjs” 1="root"
setViewAcls(defaultAclUsers, sparkConf.get("spark.ui.view.acls", ""))
setModifyAcls(defaultAclUsers, sparkConf.get("spark.modify.acls", ""))
setViewAclsGroups(sparkConf.get("spark.ui.view.acls.groups", ""));
setModifyAclsGroups(sparkConf.get("spark.modify.acls.groups", ""));
/*
2、记录输出相关日志信息
*/
logInfo("SecurityManager: authentication " + (if (authOn) "enabled" else "disabled") +
"; ui acls " + (if (aclsOn) "enabled" else "disabled") +
"; users with view permissions: " + viewAcls.toString() +
"; groups with view permissions: " + viewAclsGroups.toString() +
"; users with modify permissions: " + modifyAcls.toString() +
"; groups with modify permissions: " + modifyAclsGroups.toString())
/*
3、在Yarn模式下生成key
*/
private val secretKey = generateSecretKey()
/**
* Generates or looks up the secret key.
*
* 生成或查找密钥。
*
* The way the key is stored depends on the Spark deployment mode. Yarn
* uses the Hadoop UGI.
*
* 密钥存储方式取决于Spark部署模式。Yarn采用Hadoop UGI。
*
* For non-Yarn deployments, If the config variable is not set
* we throw an exception.
*
* 对于non-Yarn部署模式,如果配置变量没有设置,我们将抛出一个异常。
*/
private def generateSecretKey(): String = {
if (!isAuthenticationEnabled) {
null
} else if (SparkHadoopUtil.get.isYarnMode) {
// In YARN mode, the secure cookie will be created by the driver and stashed in the
// user's credentials, where executors can get it. The check for an array of size 0
// is because of the test code in YarnSparkHadoopUtilSuite.
val secretKey = SparkHadoopUtil.get.getSecretKeyFromUserCredentials(SECRET_LOOKUP_KEY) //TODO: secretKey:null
if (secretKey == null || secretKey.length == 0) {
logDebug("generateSecretKey: yarn mode, secret key from credentials is null")
val rnd = new SecureRandom()
val length = sparkConf.getInt("spark.authenticate.secretBitLength", 256) / JByte.SIZE //TODO: length:32
val secret = new Array[Byte](length)
rnd.nextBytes(secret)
val cookie = HashCodes.fromBytes(secret).toString() //TODO: cookie:"wery7237rwuhr732582irwberyu238grfuyewgr78....."
SparkHadoopUtil.get.addSecretKeyToUserCredentials(SECRET_LOOKUP_KEY, cookie)
cookie
} else {
new Text(secretKey).toString
}
} else {
// user must have set spark.authenticate.secret config
// For Master/Worker, auth secret is in conf; for Executors, it is in env variable
Option(sparkConf.getenv(SecurityManager.ENV_AUTH_SECRET))
.orElse(sparkConf.getOption(SecurityManager.SPARK_AUTH_SECRET_CONF)) match {
case Some(value) => value
case None =>
throw new IllegalArgumentException(
"Error: a secret key must be specified via the " +
SecurityManager.SPARK_AUTH_SECRET_CONF + " config")
}
}
}
/**
* Check to see if authentication for the Spark communication protocols is enabled
* 检查是否启用了Spark通信协议的身份验证。
* @return true if authentication is enabled, otherwise false
*/
def isAuthenticationEnabled(): Boolean = authOn
/*
4、使用HTTP连接设置口令认证
*/
// 使用HTTP连接设置口令认证
// 设定自己的验证器妥善协商HTTP连接/密码的用户。这是从HTTP服务器获取HTTP客户端的需要。把它放在这里,它只被设置一次。
// 注意这一段话,必须设置spark.authenticate为true,但是设置了这个,如果不是yarn模式运行会报错
if (authOn) {
Authenticator.setDefault(
new Authenticator() {
override def getPasswordAuthentication(): PasswordAuthentication = {
var passAuth: PasswordAuthentication = null
val userInfo = getRequestingURL().getUserInfo()
if (userInfo != null) {
val parts = userInfo.split(":", 2)
passAuth = new PasswordAuthentication(parts(0), parts(1).toCharArray())
}
return passAuth
}
}
)
}
/*
5、配置SSL
*/
//默认的通信SSL配置
private val defaultSSLOptions = SSLOptions.parse(sparkConf, "spark.ssl", defaults = None)
//文件服务的SSL
val fileServerSSLOptions = getSSLOptions("fs")
//配置sslSocketFactory,hostnameVerifier
val (sslSocketFactory, hostnameVerifier) = if (fileServerSSLOptions.enabled) {
val trustStoreManagers =
for (trustStore <- fileServerSSLOptions.trustStore) yield {
val input = Files.asByteSource(fileServerSSLOptions.trustStore.get).openStream()
try {
val ks = KeyStore.getInstance(KeyStore.getDefaultType)
ks.load(input, fileServerSSLOptions.trustStorePassword.get.toCharArray)
val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
tmf.init(ks)
tmf.getTrustManagers
} finally {
input.close()
}
}
lazy val credulousTrustStoreManagers = Array({
logWarning("Using 'accept-all' trust manager for SSL connections.")
new X509TrustManager {
override def getAcceptedIssuers: Array[X509Certificate] = null
override def checkClientTrusted(x509Certificates: Array[X509Certificate], s: String) {}
override def checkServerTrusted(x509Certificates: Array[X509Certificate], s: String) {}
}: TrustManager
})
require(fileServerSSLOptions.protocol.isDefined,
"spark.ssl.protocol is required when enabling SSL connections.")
val sslContext = SSLContext.getInstance(fileServerSSLOptions.protocol.get)
sslContext.init(null, trustStoreManagers.getOrElse(credulousTrustStoreManagers), null)
val hostVerifier = new HostnameVerifier {
override def verify(s: String, sslSession: SSLSession): Boolean = true
}
(Some(sslContext.getSocketFactory), Some(hostVerifier))
} else {
(None, None)
}
def getSSLOptions(module: String): SSLOptions = {
val opts = SSLOptions.parse(sparkConf, s"spark.ssl.$module", Some(defaultSSLOptions))
logDebug(s"Created SSL options for $module: $opts")
opts
}