kettle执行job流程分析(2)

真是时光荏苒啊好长时间没再写kettle的代码分析了,正好最近在排查kettle的连接池泄露的问题,趁此机会再来好好聊一聊。

如果查看 trans 和 job 的执行,就会发现这两者的执行是有较大差别的。

  • trans 每个执行组件(step)是个线程
  • job 是个线程
    kettle执行job流程分析(2)_第1张图片

kettle执行job流程分析(2)_第2张图片
job里有个比较灵活的用法,就是前后监听器 JobListener。遍查 engine的代码,以结束监听较多。

 @Override public void run() {

    ExecutorService heartbeat = null; // this job's heartbeat scheduled executor

    try {
      setStopped( false );
      setFinished( false );
      setInitialized( true );

      // Create a new variable name space as we want jobs to have their own set of variables.
      // initialize from parentJob or null
      //
      variables.initializeVariablesFrom( parentJob );
      setInternalKettleVariables( variables );
      copyParametersFrom( jobMeta );
      activateParameters();

      // Run the job
      //
      fireJobStartListeners(); // 

      heartbeat = startHeartbeat( getHeartbeatIntervalInSeconds() );

      result = execute();
    } catch ( Throwable je ) {
      log.logError( BaseMessages.getString( PKG, "Job.Log.ErrorExecJob", je.getMessage() ), je );
      // log.logError(Const.getStackTracker(je));
      //
      // we don't have result object because execute() threw a curve-ball.
      // So we create a new error object.
      //
      result = new Result();
      result.setNrErrors( 1L );
      result.setResult( false );
      addErrors( 1 ); // This can be before actual execution

      emergencyWriteJobTracker( result );

      setActive( false );
      setFinished( true );
      setStopped( false );
    } finally {
      try {
        shutdownHeartbeat( heartbeat );

        ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobFinish.id, this );
        jobMeta.disposeEmbeddedMetastoreProvider();
//     资源释放,有较多的情况下是进行 连接池释放连接。
        fireJobFinishListeners();

        // release unused vfs connections
        KettleVFS.freeUnusedResources();

      } catch ( KettleException e ) {
        result.setNrErrors( 1 );
        result.setResult( false );
        log.logError( BaseMessages.getString( PKG, "Job.Log.ErrorExecJob", e.getMessage() ), e );

        emergencyWriteJobTracker( result );
      }
    }
  }

另一个方法核心:
任务启动的启动点

/**
   * Execute a job without previous results. This is a job entry point (not recursive)
*
* * @return the result of the execution * * @throws KettleException */
private Result execute() throws KettleException { try { log.snap( Metrics.METRIC_JOB_START ); // 环境变量 setFinished( false ); setStopped( false ); KettleEnvironment.setExecutionInformation( this, rep ); log.logMinimal( BaseMessages.getString( PKG, "Job.Comment.JobStarted" ) ); ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobStart.id, this ); // Start the tracking... JobEntryResult jerStart = new JobEntryResult( null, null, BaseMessages.getString( PKG, "Job.Comment.JobStarted" ), BaseMessages .getString( PKG, "Job.Reason.Started" ), null, 0, null ); jobTracker.addJobTracker( new JobTracker( jobMeta, jerStart ) ); setActive( true ); // Where do we start? JobEntryCopy startpoint; // synchronize this to a parent job if needed. // 任务有父子级,即可以包含子任务。 Object syncObject = this; if ( parentJob != null ) { syncObject = parentJob; // parallel execution in a job } synchronized ( syncObject ) { beginProcessing(); } Result res = null; // 可以选择任务的启动节点 if ( startJobEntryCopy == null ) { startpoint = jobMeta.findJobEntry( JobMeta.STRING_SPECIAL_START, 0, false ); } else { startpoint = startJobEntryCopy; res = startJobEntryResult; } if ( startpoint == null ) { throw new KettleJobException( BaseMessages.getString( PKG, "Job.Log.CounldNotFindStartingPoint" ) ); } JobEntryResult jerEnd = null; if ( startpoint.isStart() ) { // Perform optional looping in the special Start job entry... // // long iteration = 0; boolean isFirst = true; JobEntrySpecial jes = (JobEntrySpecial) startpoint.getEntry(); while ( ( jes.isRepeat() || isFirst ) && !isStopped() ) { isFirst = false; res = execute( 0, null, startpoint, null, BaseMessages.getString( PKG, "Job.Reason.Started" ) ); // // if (iteration > 0 && (iteration % 500) == 0) { // System.out.println("other 500 iterations: " + iteration); // } // iteration++; // } jerEnd = new JobEntryResult( res, jes.getLogChannelId(), BaseMessages.getString( PKG, "Job.Comment.JobFinished" ), BaseMessages.getString( PKG, "Job.Reason.Finished" ), null, 0, null ); } else { // 启动核心,非常重要 res = execute( 0, res, startpoint, null, BaseMessages.getString( PKG, "Job.Reason.Started" ) ); jerEnd = new JobEntryResult( res, startpoint.getEntry().getLogChannel().getLogChannelId(), BaseMessages.getString( PKG, "Job.Comment.JobFinished" ), BaseMessages.getString( PKG, "Job.Reason.Finished" ), null, 0, null ); } // Save this result... jobTracker.addJobTracker( new JobTracker( jobMeta, jerEnd ) ); log.logMinimal( BaseMessages.getString( PKG, "Job.Comment.JobFinished" ) ); setActive( false ); setFinished( true ); return res; } finally { log.snap( Metrics.METRIC_JOB_STOP ); } }

下面就是最核心也是又臭又长的方法:

/**
   * Execute a job entry recursively and move to the next job entry automatically.
* Uses a back-tracking algorithm.
* * @param nr * @param prev_result * @param jobEntryCopy * @param previous * @param reason * @return * @throws KettleException */
private Result execute( final int nr, Result prev_result, final JobEntryCopy jobEntryCopy, JobEntryCopy previous, String reason ) throws KettleException { Result res = null; // 前期准备 if ( isStopped() ) { res = new Result( nr ); res.stopped = true; return res; } // if we didn't have a previous result, create one, otherwise, copy the content... // final Result newResult; Result prevResult = null; if ( prev_result != null ) { prevResult = prev_result.clone(); } else { prevResult = new Result(); } ... // Which entry is next? JobEntryInterface jobEntryInterface = jobEntryCopy.getEntry(); jobEntryInterface.getLogChannel().setLogLevel( logLevel ); // Track the fact that we are going to launch the next job entry... JobEntryResult jerBefore = new JobEntryResult( null, null, BaseMessages.getString( PKG, "Job.Comment.JobStarted" ), reason, jobEntryCopy .getName(), jobEntryCopy.getNr(), environmentSubstitute( jobEntryCopy.getEntry().getFilename() ) ); jobTracker.addJobTracker( new JobTracker( jobMeta, jerBefore ) ); ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader( jobEntryInterface.getClass().getClassLoader() ); // Execute this entry... JobEntryInterface cloneJei = (JobEntryInterface) jobEntryInterface.clone(); ( (VariableSpace) cloneJei ).copyVariablesFrom( this ); cloneJei.setRepository( rep ); if ( rep != null ) { cloneJei.setMetaStore( rep.getMetaStore() ); } cloneJei.setParentJob( this ); cloneJei.setParentJobMeta( this.getJobMeta() ); final long start = System.currentTimeMillis(); cloneJei.getLogChannel().logDetailed( "Starting job entry" ); for ( JobEntryListener jobEntryListener : jobEntryListeners ) { jobEntryListener.beforeExecution( this, jobEntryCopy, cloneJei ); } if ( interactive ) { if ( jobEntryCopy.isTransformation() ) { getActiveJobEntryTransformations().put( jobEntryCopy, (JobEntryTrans) cloneJei ); } if ( jobEntryCopy.isJob() ) { getActiveJobEntryJobs().put( jobEntryCopy, (JobEntryJob) cloneJei ); } } log.snap( Metrics.METRIC_JOBENTRY_START, cloneJei.toString() ); newResult = cloneJei.execute( prevResult, nr ); log.snap( Metrics.METRIC_JOBENTRY_STOP, cloneJei.toString() ); final long end = System.currentTimeMillis(); if ( interactive ) { if ( jobEntryCopy.isTransformation() ) { getActiveJobEntryTransformations().remove( jobEntryCopy ); } if ( jobEntryCopy.isJob() ) { getActiveJobEntryJobs().remove( jobEntryCopy ); } } if ( cloneJei instanceof JobEntryTrans ) { String throughput = newResult.getReadWriteThroughput( (int) ( ( end - start ) / 1000 ) ); if ( throughput != null ) { log.logMinimal( throughput ); } } for ( JobEntryListener jobEntryListener : jobEntryListeners ) { jobEntryListener.afterExecution( this, jobEntryCopy, cloneJei, newResult ); } Thread.currentThread().setContextClassLoader( cl ); addErrors( (int) newResult.getNrErrors() ); // Also capture the logging text after the execution... // LoggingBuffer loggingBuffer = KettleLogStore.getAppender(); StringBuffer logTextBuffer = loggingBuffer.getBuffer( cloneJei.getLogChannel().getLogChannelId(), false ); newResult.setLogText( logTextBuffer.toString() + newResult.getLogText() ); // Save this result as well... // JobEntryResult jerAfter = new JobEntryResult( newResult, cloneJei.getLogChannel().getLogChannelId(), BaseMessages.getString( PKG, "Job.Comment.JobFinished" ), null, jobEntryCopy.getName(), jobEntryCopy.getNr(), environmentSubstitute( jobEntryCopy.getEntry().getFilename() ) ); jobTracker.addJobTracker( new JobTracker( jobMeta, jerAfter ) ); synchronized ( jobEntryResults ) { jobEntryResults.add( jerAfter ); // Only keep the last X job entry results in memory // if ( maxJobEntriesLogged > 0 ) { while ( jobEntryResults.size() > maxJobEntriesLogged ) { // Remove the oldest. jobEntryResults.removeFirst(); } } } } extension = new JobExecutionExtension( this, prevResult, jobEntryCopy, extension.executeEntry ); ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobAfterJobEntryExecution.id, extension ); // Try all next job entries. // // Keep track of all the threads we fired in case of parallel execution... // Keep track of the results of these executions too. // final List<Thread> threads = new ArrayList<Thread>(); // next 2 lists is being modified concurrently so must be synchronized for this case. final Queue<Result> threadResults = new ConcurrentLinkedQueue<Result>(); final Queue<KettleException> threadExceptions = new ConcurrentLinkedQueue<KettleException>(); final List<JobEntryCopy> threadEntries = new ArrayList<JobEntryCopy>(); // Launch only those where the hop indicates true or false // int nrNext = jobMeta.findNrNextJobEntries( jobEntryCopy ); for ( int i = 0; i < nrNext && !isStopped(); i++ ) { // The next entry is... final JobEntryCopy nextEntry = jobMeta.findNextJobEntry( jobEntryCopy, i ); // See if we need to execute this... final JobHopMeta hi = jobMeta.findJobHop( jobEntryCopy, nextEntry ); // The next comment... final String nextComment; if ( hi.isUnconditional() ) { nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedUnconditional" ); } else { if ( newResult.getResult() ) { nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedSuccess" ); } else { nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedFailure" ); } } // // If the link is unconditional, execute the next job entry (entries). // If the start point was an evaluation and the link color is correct: // green or red, execute the next job entry... // if ( hi.isUnconditional() || ( jobEntryCopy.evaluates() && ( !( hi.getEvaluation() ^ newResult .getResult() ) ) ) ) { // Start this next step! if ( log.isBasic() ) { log.logBasic( BaseMessages.getString( PKG, "Job.Log.StartingEntry", nextEntry.getName() ) ); } // Pass along the previous result, perhaps the next job can use it... // However, set the number of errors back to 0 (if it should be reset) // When an evaluation is executed the errors e.g. should not be reset. if ( nextEntry.resetErrorsBeforeExecution() ) { newResult.setNrErrors( 0 ); } // Now execute! // // if (we launch in parallel, fire the execution off in a new thread... // if ( jobEntryCopy.isLaunchingInParallel() ) { threadEntries.add( nextEntry ); Runnable runnable = new Runnable() { @Override public void run() { try { Result threadResult = execute( nr + 1, newResult, nextEntry, jobEntryCopy, nextComment ); threadResults.add( threadResult ); } catch ( Throwable e ) { log.logError( Const.getStackTracker( e ) ); threadExceptions.add( new KettleException( BaseMessages.getString( PKG, "Job.Log.UnexpectedError", nextEntry.toString() ), e ) ); Result threadResult = new Result(); threadResult.setResult( false ); threadResult.setNrErrors( 1L ); threadResults.add( threadResult ); } } }; Thread thread = new Thread( runnable ); threads.add( thread ); thread.start(); if ( log.isBasic() ) { log.logBasic( BaseMessages.getString( PKG, "Job.Log.LaunchedJobEntryInParallel", nextEntry.getName() ) ); } } else { try { // Same as before: blocks until it's done // res = execute( nr + 1, newResult, nextEntry, jobEntryCopy, nextComment ); } catch ( Throwable e ) { log.logError( Const.getStackTracker( e ) ); throw new KettleException( BaseMessages.getString( PKG, "Job.Log.UnexpectedError", nextEntry.toString() ), e ); } if ( log.isBasic() ) { log.logBasic( BaseMessages.getString( PKG, "Job.Log.FinishedJobEntry", nextEntry.getName(), res.getResult() + "" ) ); } } } } // OK, if we run in parallel, we need to wait for all the job entries to // finish... // if ( jobEntryCopy.isLaunchingInParallel() ) { for ( int i = 0; i < threads.size(); i++ ) { Thread thread = threads.get( i ); JobEntryCopy nextEntry = threadEntries.get( i ); try { thread.join(); } catch ( InterruptedException e ) { log.logError( jobMeta.toString(), BaseMessages.getString( PKG, "Job.Log.UnexpectedErrorWhileWaitingForJobEntry", nextEntry.getName() ) ); threadExceptions.add( new KettleException( BaseMessages.getString( PKG, "Job.Log.UnexpectedErrorWhileWaitingForJobEntry", nextEntry.getName() ), e ) ); } } // if(log.isBasic()) log.logBasic(BaseMessages.getString(PKG, // "Job.Log.FinishedJobEntry",startpoint.getName(),res.getResult()+"")); } // Perhaps we don't have next steps?? // In this case, return the previous result. if ( res == null ) { res = prevResult; } // See if there where any errors in the parallel execution // if ( threadExceptions.size() > 0 ) { res.setResult( false ); res.setNrErrors( threadExceptions.size() ); for ( KettleException e : threadExceptions ) { log.logError( jobMeta.toString(), e.getMessage(), e ); } // Now throw the first Exception for good measure... // throw threadExceptions.poll(); } // In parallel execution, we aggregate all the results, simply add them to // the previous result... // for ( Result threadResult : threadResults ) { res.add( threadResult ); } // If there have been errors, logically, we need to set the result to // "false"... // if ( res.getNrErrors() > 0 ) { res.setResult( false ); } return res; }

你可能感兴趣的:(java,开发语言)