Spark HistoryServer处理相关的配置参数:
spark.eventLog.enabled : Whether event logging is enabled. spark.eventLog.compress : Whether to compress logged events spark.eventLog.overwrite : Whether to overwrite any existing files. spark.eventLog.dir : Path to the directory in which events are logged. spark.eventLog.buffer.kb : Buffer size to use when writing to output streams
Spark HistoryServer主要包含两部分:Spark Application运行时记录日志信息和HistoryServer查询Spark Application运行时信息;下面从这两部分分开介绍:
1、Spark Application运行时记录日志信息
在SparkContext启动过程中启动EventLoggingListener和LiveListenerBus
SparkContext{ private[spark] val isEventLogEnabled = conf.getBoolean("spark.eventLog.enabled", false) private[spark] val eventLogDir: Option[String] = { if (isEventLogEnabled) { Some(conf.get("spark.eventLog.dir", EventLoggingListener.DEFAULT_LOG_DIR).stripSuffix("/")) } else { None } } private[spark] val listenerBus = new LiveListenerBus private[spark] val env = SparkEnv.create(....listenerBus...) SparkEnv.set(env) // Optionally log Spark events private[spark] val eventLogger: Option[EventLoggingListener] = { if (isEventLogEnabled) { //记录 val logger = new EventLoggingListener(applicationId, eventLogDir.get, conf, hadoopConfiguration) logger.start() listenerBus.addListener(logger) //添加EventLoggingListener Some(logger) } else None } // At this point, all relevant SparkListeners have been registered, so begin releasing events listenerBus.start() def stop() = { postApplicationEnd() ui.foreach(_.stop()) val dagSchedulerCopy = dagScheduler dagScheduler = null if (dagSchedulerCopy != null) { ... SparkEnv.set(null) listenerBus.stop() eventLogger.foreach(_.stop()) //在applicationid对应的日志文件夹下创建APPLICATION_COMPLETE文件 logInfo("Successfully stopped SparkContext") } else { logInfo("SparkContext already stopped") } } }
//A SparkListener that logs events to persistent storage. EventLoggingListener extends SparkListener { protected val logger = new FileLogger(logDir, sparkConf, hadoopConf, outputBufferSize, shouldCompress, shouldOverwrite, Some(LOG_FILE_PERMISSIONS)) def start() { logger.start() //创建日志文件名: spark.eventLog.dir + "/" + applicationId logInfo("Logging events to %s".format(logDir)) if (shouldCompress) { //shouldCompress = sparkConf.getBoolean("spark.eventLog.compress", false) val codec = sparkConf.get("spark.io.compression.codec", CompressionCodec.DEFAULT_COMPRESSION_CODEC) logger.newFile(COMPRESSION_CODEC_PREFIX + codec) //创建文件:COMPRESSION_CODEC_xxx(snappy) } logger.newFile(SPARK_VERSION_PREFIX + SPARK_VERSION) //创建文件:SPARK_VERSION_xxx(1.2.0-SNAPSHOT) logger.newFile(LOG_PREFIX + logger.fileIndex) //创建文件:EVENT_LOG_x(从0开始递增) } //Log the event as JSON private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false) { val eventJson = JsonProtocol.sparkEventToJson(event) logger.logLine(compact(render(eventJson))) if (flushLogger) { logger.flush() } if (testing) { loggedEvents += eventJson } } def stop() = { logger.newFile(APPLICATION_COMPLETE) //val APPLICATION_COMPLETE = "APPLICATION_COMPLETE" logger.stop() } }
从这个方法可以知道每个Application对应的日志目录下会有如下文件:
1)EVENT_LOG_x: 存放该Application过程中发生的事件日志信息;
2)SPARK_VERSION_xxx:xxx为Spark的版本号,比如1.2.0-SNAPSHOT,该文件中没有内容;
3)如果配置了压缩,如果配置了压缩,会有COMPRESSION_CODE_xxx: xxx为压缩方法,比如:snappy;
4)在SparkContext顺利stop之后,会再创建一个文件名为APPLICATION_COMPLETE的文件;该文件中也没有内容;
形如:没有配置spark.eventLog.compress属性,故只有三个文件
[spark@hadoop000 conf]$ hadoop fs -ls hdfs://hadoop000:8020/directory/app-20141111141629-0005
-rwxrwx--- 1 spark supergroup 0 2014-11-11 14:21 hdfs://hadoop000:8020/directory/app-20141111141629-0005/APPLICATION_COMPLETE -rwxrwx--- 1 spark supergroup 38197 2014-11-11 14:21 hdfs://hadoop000:8020/directory/app-20141111141629-0005/EVENT_LOG_1 -rwxrwx--- 1 spark supergroup 0 2014-11-11 14:16 hdfs://hadoop000:8020/directory/app-20141111141629-0005/SPARK_VERSION_1.2.0-SNAPSHOT
//A generic class for logging information to file. FileLogger{ def start() { createLogDir() //创建applicationid对应的文件夹 } private def createLogDir() { val path = new Path(logDir) // logDir=spark.eventLog.dir的值/applicationid的值 if (fileSystem.exists(path)) { if (overwrite) { shouldOverwrite = sparkConf.getBoolean("spark.eventLog.overwrite", false) logWarning("Log directory %s already exists. Overwriting...".format(logDir)) fileSystem.delete(path, true) } else { throw new IOException("Log directory %s already exists!".format(logDir)) } } if (!fileSystem.mkdirs(path)) { throw new IOException("Error in creating log directory: %s".format(logDir)) } if (dirPermissions.isDefined) { val fsStatus = fileSystem.getFileStatus(path) if (fsStatus.getPermission.toShort != dirPermissions.get.toShort) { fileSystem.setPermission(path, dirPermissions.get) } } } def logLine(msg: String, withTime: Boolean = false) = log(msg + "\n", withTime) def log(msg: String, withTime: Boolean = false) { val writeInfo = if (!withTime) { msg } else { val date = new Date(System.currentTimeMillis) dateFormat.get.format(date) + ": " + msg } writer.foreach(_.print(writeInfo)) } private var writer: Option[PrintWriter] = None def flush() { writer.foreach(_.flush()) hadoopDataStream.foreach(hadoopFlushMethod.invoke(_)) } }
总结:
1)启动SparkContext的过程中,会创建事件日志监听器(EventLoggingListener);
2)监听器为每个Application创建一个存放日志信息的文件夹,路径为:spark.eventLog.dir + "/" + applicationId;
3)每个Application文件夹下包含如下文件:
a) APPLICATION_COMPLETE (SparkContext.stop()方法执行后才会创建该文件)
b) EVENT_LOG_x
c) SPARK_VERSION_xxx
d) COMPRESSION_CODEC_xxx (配置了spark.eventLog.compress才会有该文件)
4)Application在执行过程中触发Event时就会往EVENT_LOG_x中记录事件日志信息;
2、HistoryServer查询Spark Application运行时信息
HistoryServer{ //初始化HistoryServer,启动后台线程定时去指定的目录下同步spark执行日志信息供UI展现 def main(argStrings: Array[String]) { val providerName = conf.getOption("spark.history.provider").getOrElse(classOf[FsHistoryProvider].getName()) val provider = Class.forName(providerName)....... //获得指定hdfs路径下所有符合条件的applications } initialize(){ attachPage(new HistoryPage(this)) //构建historyserver主页展现的application信息
//jetty使用具体参见:Spark HistoryServer jetty简单使用 val contextHandler = new ServletContextHandler contextHandler.setContextPath(HistoryServer.UI_PATH_PREFIX) // UI_PATH_PREFIX = "/history" contextHandler.addServlet(new ServletHolder(loaderServlet), "/*") //为符合contextPath路径的请求使用指定的servlet处理 attachHandler(contextHandler) } private val loaderServlet = new HttpServlet { protected override def doGet(req: HttpServletRequest, res: HttpServletResponse): Unit = { val parts = Option(req.getPathInfo()).getOrElse("").split("/") val appId = parts(1) appCache.get(appId) res.sendRedirect(res.encodeRedirectURL(req.getRequestURI())) } } private val appCache = CacheBuilder.newBuilder().maximumSize(retainedApplications) .removalListener(new RemovalListener[String, SparkUI] { override def onRemoval(rm: RemovalNotification[String, SparkUI]) = { //当超出内存能存放的个数时,最先放入的app信息会被剔除 detachSparkUI(rm.getValue()) } }).build(appLoader) private val appLoader = new CacheLoader[String, SparkUI] { override def load(key: String): SparkUI = { //根据指定appId获得到该appId的所有需要展现的信息 val ui = provider.getAppUI(key).getOrElse(throw new NoSuchElementException()) attachSparkUI(ui) ui } } }
FsHistoryProvider{ //间隔读取文件时间 private val UPDATE_INTERVAL_MS = conf.getInt("spark.history.fs.updateInterval", conf.getInt("spark.history.updateInterval", 10)) * 1000 //日志记录文件地址 private val logDir = conf.get("spark.history.fs.logDirectory", null) private val resolvedLogDir = Option(logDir).map { d => Utils.resolveURI(d) } .getOrElse { throw new IllegalArgumentException("Logging directory must be specified.") } //按照job的结束时间降序 private var applications: mutable.LinkedHashMap[String, FsApplicationHistoryInfo] = new mutable.LinkedHashMap() //调用初始化方法后即可获得到页面展现的applications信息:mutable.LinkedHashMap[String, FsApplicationHistoryInfo] initialize(){ val path = new Path(resolvedLogDir) checkForLogs(){ //加载日志文件:加载最新修改的并且application的状态为APPLICATION_COMPLETE的,按照app的结束时间降序排 logDirs.filter(...).flatMap({
val(replayBus, appListener) = createReplayBus(dir)
replayBus.replay()
}... new FsApplicationHistoryInfo()...).soryBy(-info.endTime) //合并新application的logs到已存在的application列表中去,降序排 val newApps = new mutable.LinkedHashMap[String, FsApplicationHistoryInfo]() def addIfAbsent(info: FsApplicationHistoryInfo) = { if (!newApps.contains(info.id)) { newApps += (info.id -> info) } } ...... applications = newApps } logCheckingThread.setDaemon(true) logCheckingThread.start() //启动后台线程调用checkForLog检查并添加最新的application } private val logCheckingThread = new Thread("LogCheckingThread") { override def run() = Utils.logUncaughtExceptions { while (true) { val now = getMonotonicTimeMs() if (now - lastLogCheckTimeMs > UPDATE_INTERVAL_MS) { Thread.sleep(UPDATE_INTERVAL_MS) } else { Thread.sleep(lastLogCheckTimeMs + UPDATE_INTERVAL_MS - now) } checkForLogs() } } } private class FsApplicationHistoryInfo( //该信息就是在historyserver主页面要展现的列表字段信息 val logDir: String, id: String, name: String, startTime: Long, endTime: Long, lastUpdated: Long, sparkUser: String) extends ApplicationHistoryInfo(id, name, startTime, endTime, lastUpdated, sparkUser) }
通过replay方法之后就能得到符合条件的所有application的event log,后续交由XXXPage展现即可。
//A SparkListenerBus that replays logged events from persisted storage. ReplayListenerBus{ //Replay each event in the order maintained in the given logs. This should only be called exactly once. def replay() { logPaths.foreach { path => var fileStream: Option[InputStream] = None var bufferedStream: Option[InputStream] = None var compressStream: Option[InputStream] = None var currentLine = "<not started>" try { fileStream = Some(fileSystem.open(path)) bufferedStream = Some(new BufferedInputStream(fileStream.get)) compressStream = Some(wrapForCompression(bufferedStream.get)) // Parse each line as an event and post the event to all attached listeners val lines = Source.fromInputStream(compressStream.get).getLines() lines.foreach { line => currentLine = line postToAll(JsonProtocol.sparkEventFromJson(parse(line))) } } catch { case e: Exception => logError("Exception in parsing Spark event log %s".format(path), e) logError("Malformed line: %s\n".format(currentLine)) } finally { fileStream.foreach(_.close()) bufferedStream.foreach(_.close()) compressStream.foreach(_.close()) } } replayed = true } }
以下是具体页面的渲染展现:
WEBUI{ //调用page的render方法渲染页面, WebUIPage包括:HistoryPage/ExecutorsPage/ApplicationPage/EnvirmentPage等与页面相关的 def attachPage(page: WebUIPage) { val pagePath = "/" + page.prefix attachHandler(createServletHandler(pagePath, (request: HttpServletRequest) => page.render(request), securityManager, basePath)) attachHandler(createServletHandler(pagePath.stripSuffix("/") + "/json", (request: HttpServletRequest) => page.renderJson(request), securityManager, basePath)) } }
HistoryPage{ def render(request: HttpServletRequest): Seq[Node] = { //获取所有的applications val allApps = parent.getApplicationList(){ HistoryServer.getApplicationList(){ FsHistoryProvider.getListing(){ applications.values } } } //分页展示...... val apps = ... //通过分页计算出当前页面要展现的apps val appTable = UIUtils.listingTable(appHeader, appRow, apps) val context = ... //页面要展现的applications的字符串信息拼接 UIUtils.basicSparkPage(content, "History Server") //通过UIUtils工具类构造出页面要展现的html信息 } //页面表头信息 private val appHeader = Seq("App ID","App Name","Started","Completed","Duration","Spark User","Last Updated") //根据每个application构造出页面表格中要展示的信息 private def appRow(info: ApplicationHistoryInfo): Seq[Node] = { val uiAddress = HistoryServer.UI_PATH_PREFIX + s"/${info.id}" val startTime = UIUtils.formatDate(info.startTime) val endTime = UIUtils.formatDate(info.endTime) val duration = UIUtils.formatDuration(info.endTime - info.startTime) val lastUpdated = UIUtils.formatDate(info.lastUpdated) <tr> <td><a href={uiAddress}>{info.id}</a></td> <td>{info.name}</td> <td>{startTime}</td> <td>{endTime}</td> <td>{duration}</td> <td>{info.sparkUser}</td> <td>{lastUpdated}</td> </tr> } }
SparkUI( val sc: SparkContext, val conf: SparkConf, val securityManager: SecurityManager, val listenerBus: SparkListenerBus, var appName: String, val basePath: String = "") extends WebUI(securityManager, SparkUI.getUIPort(conf), conf, basePath, "SparkUI"){ //初始化所有页面显示的组件信息 def initialize() { listenerBus.addListener(storageStatusListener) val jobProgressTab = new JobProgressTab(this) attachTab(jobProgressTab) attachTab(new StorageTab(this)) attachTab(new EnvironmentTab(this)) attachTab(new ExecutorsTab(this)) attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static")) attachHandler(createRedirectHandler("/", "/stages", basePath = basePath)) attachHandler(createRedirectHandler("/stages/stage/kill", "/stages", jobProgressTab.handleKillRequest)) if (live) { sc.env.metricsSystem.getServletHandlers.foreach(attachHandler) } } }
EnvironmentPage{ def render(request: HttpServletRequest): Seq[Node] = { val runtimeInformationTable = UIUtils.listingTable( propertyHeader, jvmRow, listener.jvmInformation, fixedWidth = true) val sparkPropertiesTable = UIUtils.listingTable( propertyHeader, propertyRow, listener.sparkProperties, fixedWidth = true) val systemPropertiesTable = UIUtils.listingTable( propertyHeader, propertyRow, listener.systemProperties, fixedWidth = true) val classpathEntriesTable = UIUtils.listingTable( classPathHeaders, classPathRow, listener.classpathEntries, fixedWidth = true) val content = <span> <h4>Runtime Information</h4> {runtimeInformationTable} <h4>Spark Properties</h4> {sparkPropertiesTable} <h4>System Properties</h4> {systemPropertiesTable} <h4>Classpath Entries</h4> {classpathEntriesTable} </span> //返回以指定格式header的spark页面 UIUtils.headerSparkPage("Environment", content, parent) } private def propertyHeader = Seq("Name", "Value") private def classPathHeaders = Seq("Resource", "Source") private def jvmRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr> private def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr> private def classPathRow(data: (String, String)) = <tr><td>{data._1}</td><td>{data._2}</td></tr> }
ExecutorsPage{ //预先定义了一堆Executor页面table中需要展现的字段信息 private case class ExecutorSummaryInfo( id: String, hostPort: String, rddBlocks: Int, memoryUsed: Long, diskUsed: Long, activeTasks: Int, failedTasks: Int, completedTasks: Int, totalTasks: Int, totalDuration: Long, totalInputBytes: Long, totalShuffleRead: Long, totalShuffleWrite: Long, maxMemory: Long) def render(request: HttpServletRequest): Seq[Node] = { val content = <div class="row-fluid"> <div class="span12"> <ul class="unstyled"> <li><strong>Memory:</strong> {Utils.bytesToString(memUsed)} Used ({Utils.bytesToString(maxMem)} Total) </li> <li><strong>Disk:</strong> {Utils.bytesToString(diskUsed)} Used </li> </ul> </div> </div> <div class = "row"> <div class="span12"> {execTable} </div> </div>; UIUtils.headerSparkPage("Executors (" + execInfo.size + ")", content, parent) } }
UIUtil{ //构造出spark history server主页要展示的html信息 def basicSparkPage(content: => Seq[Node], title: String): Seq[Node] = { <html> <head> {commonHeaderNodes} <title>{title}</title> </head> <body> <div class="container-fluid"> <div class="row-fluid"> <div class="span12"> <h3 style="vertical-align: middle; display: inline-block;"> <a style="text-decoration: none" href={prependBaseUri("/")}> <img src={prependBaseUri("/static/spark-logo-77x50px-hd.png")} style="margin-right: 15px;" /> </a> {title} </h3> </div> </div> {content} </div> </body> </html> } //加载渲染页面的js/css/jquery等文件 def commonHeaderNodes = { <meta http-equiv="Content-type" content="text/html; charset=utf-8" /> <link rel="stylesheet" href={prependBaseUri("/static/bootstrap.min.css")} type="text/css" /> <script src={prependBaseUri("/static/jquery-1.11.1.min.js")}></script> ...... } //构造table信息: headers为表头名称 def listingTable[T](headers: Seq[String], generateDataRow: T => Seq[Node],data: Iterable[T],fixedWidth: Boolean = false): Seq[Node] = { } }