真是时光荏苒啊好长时间没再写kettle的代码分析了,正好最近在排查kettle的连接池泄露的问题,趁此机会再来好好聊一聊。
如果查看 trans 和 job 的执行,就会发现这两者的执行是有较大差别的。
job里有个比较灵活的用法,就是前后监听器 JobListener。遍查 engine的代码,以结束监听较多。
@Override public void run() {
ExecutorService heartbeat = null; // this job's heartbeat scheduled executor
try {
setStopped( false );
setFinished( false );
setInitialized( true );
// Create a new variable name space as we want jobs to have their own set of variables.
// initialize from parentJob or null
//
variables.initializeVariablesFrom( parentJob );
setInternalKettleVariables( variables );
copyParametersFrom( jobMeta );
activateParameters();
// Run the job
//
fireJobStartListeners(); //
heartbeat = startHeartbeat( getHeartbeatIntervalInSeconds() );
result = execute();
} catch ( Throwable je ) {
log.logError( BaseMessages.getString( PKG, "Job.Log.ErrorExecJob", je.getMessage() ), je );
// log.logError(Const.getStackTracker(je));
//
// we don't have result object because execute() threw a curve-ball.
// So we create a new error object.
//
result = new Result();
result.setNrErrors( 1L );
result.setResult( false );
addErrors( 1 ); // This can be before actual execution
emergencyWriteJobTracker( result );
setActive( false );
setFinished( true );
setStopped( false );
} finally {
try {
shutdownHeartbeat( heartbeat );
ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobFinish.id, this );
jobMeta.disposeEmbeddedMetastoreProvider();
// 资源释放,有较多的情况下是进行 连接池释放连接。
fireJobFinishListeners();
// release unused vfs connections
KettleVFS.freeUnusedResources();
} catch ( KettleException e ) {
result.setNrErrors( 1 );
result.setResult( false );
log.logError( BaseMessages.getString( PKG, "Job.Log.ErrorExecJob", e.getMessage() ), e );
emergencyWriteJobTracker( result );
}
}
}
另一个方法核心:
任务启动的启动点
/**
* Execute a job without previous results. This is a job entry point (not recursive)
*
*
* @return the result of the execution
*
* @throws KettleException
*/
private Result execute() throws KettleException {
try {
log.snap( Metrics.METRIC_JOB_START );
// 环境变量
setFinished( false );
setStopped( false );
KettleEnvironment.setExecutionInformation( this, rep );
log.logMinimal( BaseMessages.getString( PKG, "Job.Comment.JobStarted" ) );
ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobStart.id, this );
// Start the tracking...
JobEntryResult jerStart =
new JobEntryResult( null, null, BaseMessages.getString( PKG, "Job.Comment.JobStarted" ), BaseMessages
.getString( PKG, "Job.Reason.Started" ), null, 0, null );
jobTracker.addJobTracker( new JobTracker( jobMeta, jerStart ) );
setActive( true );
// Where do we start?
JobEntryCopy startpoint;
// synchronize this to a parent job if needed.
// 任务有父子级,即可以包含子任务。
Object syncObject = this;
if ( parentJob != null ) {
syncObject = parentJob; // parallel execution in a job
}
synchronized ( syncObject ) {
beginProcessing();
}
Result res = null;
// 可以选择任务的启动节点
if ( startJobEntryCopy == null ) {
startpoint = jobMeta.findJobEntry( JobMeta.STRING_SPECIAL_START, 0, false );
} else {
startpoint = startJobEntryCopy;
res = startJobEntryResult;
}
if ( startpoint == null ) {
throw new KettleJobException( BaseMessages.getString( PKG, "Job.Log.CounldNotFindStartingPoint" ) );
}
JobEntryResult jerEnd = null;
if ( startpoint.isStart() ) {
// Perform optional looping in the special Start job entry...
//
// long iteration = 0;
boolean isFirst = true;
JobEntrySpecial jes = (JobEntrySpecial) startpoint.getEntry();
while ( ( jes.isRepeat() || isFirst ) && !isStopped() ) {
isFirst = false;
res = execute( 0, null, startpoint, null, BaseMessages.getString( PKG, "Job.Reason.Started" ) );
//
// if (iteration > 0 && (iteration % 500) == 0) {
// System.out.println("other 500 iterations: " + iteration);
// }
// iteration++;
//
}
jerEnd =
new JobEntryResult( res, jes.getLogChannelId(), BaseMessages.getString( PKG, "Job.Comment.JobFinished" ),
BaseMessages.getString( PKG, "Job.Reason.Finished" ), null, 0, null );
} else {
// 启动核心,非常重要
res = execute( 0, res, startpoint, null, BaseMessages.getString( PKG, "Job.Reason.Started" ) );
jerEnd =
new JobEntryResult( res, startpoint.getEntry().getLogChannel().getLogChannelId(), BaseMessages.getString(
PKG, "Job.Comment.JobFinished" ), BaseMessages.getString( PKG, "Job.Reason.Finished" ), null, 0, null );
}
// Save this result...
jobTracker.addJobTracker( new JobTracker( jobMeta, jerEnd ) );
log.logMinimal( BaseMessages.getString( PKG, "Job.Comment.JobFinished" ) );
setActive( false );
setFinished( true );
return res;
} finally {
log.snap( Metrics.METRIC_JOB_STOP );
}
}
下面就是最核心也是又臭又长的方法:
/**
* Execute a job entry recursively and move to the next job entry automatically.
* Uses a back-tracking algorithm.
*
* @param nr
* @param prev_result
* @param jobEntryCopy
* @param previous
* @param reason
* @return
* @throws KettleException
*/
private Result execute( final int nr, Result prev_result, final JobEntryCopy jobEntryCopy, JobEntryCopy previous,
String reason ) throws KettleException {
Result res = null;
// 前期准备
if ( isStopped() ) {
res = new Result( nr );
res.stopped = true;
return res;
}
// if we didn't have a previous result, create one, otherwise, copy the content...
//
final Result newResult;
Result prevResult = null;
if ( prev_result != null ) {
prevResult = prev_result.clone();
} else {
prevResult = new Result();
}
...
// Which entry is next?
JobEntryInterface jobEntryInterface = jobEntryCopy.getEntry();
jobEntryInterface.getLogChannel().setLogLevel( logLevel );
// Track the fact that we are going to launch the next job entry...
JobEntryResult jerBefore =
new JobEntryResult( null, null, BaseMessages.getString( PKG, "Job.Comment.JobStarted" ), reason, jobEntryCopy
.getName(), jobEntryCopy.getNr(), environmentSubstitute( jobEntryCopy.getEntry().getFilename() ) );
jobTracker.addJobTracker( new JobTracker( jobMeta, jerBefore ) );
ClassLoader cl = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader( jobEntryInterface.getClass().getClassLoader() );
// Execute this entry...
JobEntryInterface cloneJei = (JobEntryInterface) jobEntryInterface.clone();
( (VariableSpace) cloneJei ).copyVariablesFrom( this );
cloneJei.setRepository( rep );
if ( rep != null ) {
cloneJei.setMetaStore( rep.getMetaStore() );
}
cloneJei.setParentJob( this );
cloneJei.setParentJobMeta( this.getJobMeta() );
final long start = System.currentTimeMillis();
cloneJei.getLogChannel().logDetailed( "Starting job entry" );
for ( JobEntryListener jobEntryListener : jobEntryListeners ) {
jobEntryListener.beforeExecution( this, jobEntryCopy, cloneJei );
}
if ( interactive ) {
if ( jobEntryCopy.isTransformation() ) {
getActiveJobEntryTransformations().put( jobEntryCopy, (JobEntryTrans) cloneJei );
}
if ( jobEntryCopy.isJob() ) {
getActiveJobEntryJobs().put( jobEntryCopy, (JobEntryJob) cloneJei );
}
}
log.snap( Metrics.METRIC_JOBENTRY_START, cloneJei.toString() );
newResult = cloneJei.execute( prevResult, nr );
log.snap( Metrics.METRIC_JOBENTRY_STOP, cloneJei.toString() );
final long end = System.currentTimeMillis();
if ( interactive ) {
if ( jobEntryCopy.isTransformation() ) {
getActiveJobEntryTransformations().remove( jobEntryCopy );
}
if ( jobEntryCopy.isJob() ) {
getActiveJobEntryJobs().remove( jobEntryCopy );
}
}
if ( cloneJei instanceof JobEntryTrans ) {
String throughput = newResult.getReadWriteThroughput( (int) ( ( end - start ) / 1000 ) );
if ( throughput != null ) {
log.logMinimal( throughput );
}
}
for ( JobEntryListener jobEntryListener : jobEntryListeners ) {
jobEntryListener.afterExecution( this, jobEntryCopy, cloneJei, newResult );
}
Thread.currentThread().setContextClassLoader( cl );
addErrors( (int) newResult.getNrErrors() );
// Also capture the logging text after the execution...
//
LoggingBuffer loggingBuffer = KettleLogStore.getAppender();
StringBuffer logTextBuffer = loggingBuffer.getBuffer( cloneJei.getLogChannel().getLogChannelId(), false );
newResult.setLogText( logTextBuffer.toString() + newResult.getLogText() );
// Save this result as well...
//
JobEntryResult jerAfter =
new JobEntryResult( newResult, cloneJei.getLogChannel().getLogChannelId(), BaseMessages.getString( PKG,
"Job.Comment.JobFinished" ), null, jobEntryCopy.getName(), jobEntryCopy.getNr(), environmentSubstitute(
jobEntryCopy.getEntry().getFilename() ) );
jobTracker.addJobTracker( new JobTracker( jobMeta, jerAfter ) );
synchronized ( jobEntryResults ) {
jobEntryResults.add( jerAfter );
// Only keep the last X job entry results in memory
//
if ( maxJobEntriesLogged > 0 ) {
while ( jobEntryResults.size() > maxJobEntriesLogged ) {
// Remove the oldest.
jobEntryResults.removeFirst();
}
}
}
}
extension = new JobExecutionExtension( this, prevResult, jobEntryCopy, extension.executeEntry );
ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobAfterJobEntryExecution.id, extension );
// Try all next job entries.
//
// Keep track of all the threads we fired in case of parallel execution...
// Keep track of the results of these executions too.
//
final List<Thread> threads = new ArrayList<Thread>();
// next 2 lists is being modified concurrently so must be synchronized for this case.
final Queue<Result> threadResults = new ConcurrentLinkedQueue<Result>();
final Queue<KettleException> threadExceptions = new ConcurrentLinkedQueue<KettleException>();
final List<JobEntryCopy> threadEntries = new ArrayList<JobEntryCopy>();
// Launch only those where the hop indicates true or false
//
int nrNext = jobMeta.findNrNextJobEntries( jobEntryCopy );
for ( int i = 0; i < nrNext && !isStopped(); i++ ) {
// The next entry is...
final JobEntryCopy nextEntry = jobMeta.findNextJobEntry( jobEntryCopy, i );
// See if we need to execute this...
final JobHopMeta hi = jobMeta.findJobHop( jobEntryCopy, nextEntry );
// The next comment...
final String nextComment;
if ( hi.isUnconditional() ) {
nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedUnconditional" );
} else {
if ( newResult.getResult() ) {
nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedSuccess" );
} else {
nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedFailure" );
}
}
//
// If the link is unconditional, execute the next job entry (entries).
// If the start point was an evaluation and the link color is correct:
// green or red, execute the next job entry...
//
if ( hi.isUnconditional() || ( jobEntryCopy.evaluates() && ( !( hi.getEvaluation() ^ newResult
.getResult() ) ) ) ) {
// Start this next step!
if ( log.isBasic() ) {
log.logBasic( BaseMessages.getString( PKG, "Job.Log.StartingEntry", nextEntry.getName() ) );
}
// Pass along the previous result, perhaps the next job can use it...
// However, set the number of errors back to 0 (if it should be reset)
// When an evaluation is executed the errors e.g. should not be reset.
if ( nextEntry.resetErrorsBeforeExecution() ) {
newResult.setNrErrors( 0 );
}
// Now execute!
//
// if (we launch in parallel, fire the execution off in a new thread...
//
if ( jobEntryCopy.isLaunchingInParallel() ) {
threadEntries.add( nextEntry );
Runnable runnable = new Runnable() {
@Override public void run() {
try {
Result threadResult = execute( nr + 1, newResult, nextEntry, jobEntryCopy, nextComment );
threadResults.add( threadResult );
} catch ( Throwable e ) {
log.logError( Const.getStackTracker( e ) );
threadExceptions.add( new KettleException( BaseMessages.getString( PKG, "Job.Log.UnexpectedError",
nextEntry.toString() ), e ) );
Result threadResult = new Result();
threadResult.setResult( false );
threadResult.setNrErrors( 1L );
threadResults.add( threadResult );
}
}
};
Thread thread = new Thread( runnable );
threads.add( thread );
thread.start();
if ( log.isBasic() ) {
log.logBasic( BaseMessages.getString( PKG, "Job.Log.LaunchedJobEntryInParallel", nextEntry.getName() ) );
}
} else {
try {
// Same as before: blocks until it's done
//
res = execute( nr + 1, newResult, nextEntry, jobEntryCopy, nextComment );
} catch ( Throwable e ) {
log.logError( Const.getStackTracker( e ) );
throw new KettleException( BaseMessages.getString( PKG, "Job.Log.UnexpectedError", nextEntry.toString() ),
e );
}
if ( log.isBasic() ) {
log.logBasic( BaseMessages.getString( PKG, "Job.Log.FinishedJobEntry", nextEntry.getName(), res.getResult()
+ "" ) );
}
}
}
}
// OK, if we run in parallel, we need to wait for all the job entries to
// finish...
//
if ( jobEntryCopy.isLaunchingInParallel() ) {
for ( int i = 0; i < threads.size(); i++ ) {
Thread thread = threads.get( i );
JobEntryCopy nextEntry = threadEntries.get( i );
try {
thread.join();
} catch ( InterruptedException e ) {
log.logError( jobMeta.toString(), BaseMessages.getString( PKG,
"Job.Log.UnexpectedErrorWhileWaitingForJobEntry", nextEntry.getName() ) );
threadExceptions.add( new KettleException( BaseMessages.getString( PKG,
"Job.Log.UnexpectedErrorWhileWaitingForJobEntry", nextEntry.getName() ), e ) );
}
}
// if(log.isBasic()) log.logBasic(BaseMessages.getString(PKG,
// "Job.Log.FinishedJobEntry",startpoint.getName(),res.getResult()+""));
}
// Perhaps we don't have next steps??
// In this case, return the previous result.
if ( res == null ) {
res = prevResult;
}
// See if there where any errors in the parallel execution
//
if ( threadExceptions.size() > 0 ) {
res.setResult( false );
res.setNrErrors( threadExceptions.size() );
for ( KettleException e : threadExceptions ) {
log.logError( jobMeta.toString(), e.getMessage(), e );
}
// Now throw the first Exception for good measure...
//
throw threadExceptions.poll();
}
// In parallel execution, we aggregate all the results, simply add them to
// the previous result...
//
for ( Result threadResult : threadResults ) {
res.add( threadResult );
}
// If there have been errors, logically, we need to set the result to
// "false"...
//
if ( res.getNrErrors() > 0 ) {
res.setResult( false );
}
return res;
}