Sink 将从channel接收event,然后将event发往目标地址。
/** * * A simple sink which reads events from a channel and writes them to HBase. * This Sink uses an aysnchronous API internally and is likely to * perform better. * The Hbase configution is picked up from the first <tt>hbase-site.xml</tt> * encountered in the classpath. This sink supports batch reading of * events from the channel, and writing them to Hbase, to minimize the number * of flushes on the hbase tables. To use this sink, it has to be configured * with certain mandatory parameters:<p>*/ public class AsyncHBaseSink extends AbstractSink implements Configurable { @Override public Status process() throws EventDeliveryException { /* * Reference to the boolean representing failure of the current transaction. * Since each txn gets a new boolean, failure of one txn will not affect * the next even if errbacks for the current txn get called while * the next one is being processed. * */ if(!open){ throw new EventDeliveryException("Sink was never opened. " + "Please fix the configuration."); } AtomicBoolean txnFail = new AtomicBoolean(false); AtomicInteger callbacksReceived = new AtomicInteger(0); AtomicInteger callbacksExpected = new AtomicInteger(0); final Lock lock = new ReentrantLock(); final Condition condition = lock.newCondition(); /* * Callbacks can be reused per transaction, since they share the same * locks and conditions. */ Callback<Object, Object> putSuccessCallback = new SuccessCallback<Object, Object>( lock, callbacksReceived, condition); Callback<Object, Object> putFailureCallback = new FailureCallback<Object, Object>( lock, callbacksReceived, txnFail, condition); Callback<Long, Long> incrementSuccessCallback = new SuccessCallback<Long, Long>( lock, callbacksReceived, condition); Callback<Long, Long> incrementFailureCallback = new FailureCallback<Long, Long>( lock, callbacksReceived, txnFail, condition); Status status = Status.READY; Channel channel = getChannel(); int i = 0; try { txn = channel.getTransaction(); txn.begin(); for (; i < batchSize; i++) { Event event = channel.take(); if (event == null) { status = Status.BACKOFF; if (i == 0) { sinkCounter.incrementBatchEmptyCount(); } else { sinkCounter.incrementBatchUnderflowCount(); } break; } else { serializer.setEvent(event); List<PutRequest> actions = serializer.getActions(); List<AtomicIncrementRequest> increments = serializer.getIncrements(); callbacksExpected.addAndGet(actions.size() + increments.size()); for (PutRequest action : actions) { client.put(action).addCallbacks(putSuccessCallback, putFailureCallback); } for (AtomicIncrementRequest increment : increments) { client.atomicIncrement(increment).addCallbacks( incrementSuccessCallback, incrementFailureCallback); } } } } catch (Throwable e) { this.handleTransactionFailure(txn); this.checkIfChannelExceptionAndThrow(e); } if (i == batchSize) { sinkCounter.incrementBatchCompleteCount(); } sinkCounter.addToEventDrainAttemptCount(i); lock.lock(); try { //等待处理完成或者超时 while ((callbacksReceived.get() < callbacksExpected.get()) && !txnFail.get()) { try { if(!condition.await(timeout, TimeUnit.MILLISECONDS)){ txnFail.set(true); logger.warn("HBase callbacks timed out. " + "Transaction will be rolled back."); } } catch (Exception ex) { logger.error("Exception while waiting for callbacks from HBase."); this.handleTransactionFailure(txn); Throwables.propagate(ex); } } } finally { lock.unlock(); } /* * At this point, either the txn has failed * or all callbacks received and txn is successful. * * This need not be in the monitor, since all callbacks for this txn * have been received. So txnFail will not be modified any more(even if * it is, it is set from true to true only - false happens only * in the next process call). * */ if (txnFail.get()) { this.handleTransactionFailure(txn); throw new EventDeliveryException("Could not write events to Hbase. " + "Transaction failed, and rolled back."); } else { try{ txn.commit(); txn.close(); sinkCounter.addToEventDrainSuccessCount(i); } catch (Throwable e) { this.handleTransactionFailure(txn); this.checkIfChannelExceptionAndThrow(e); } } return status; } }
/** * <p> * A {@link Sink} implementation that can send events to an RPC server (such as * Flume's {@link AvroSource}). * </p> * <p> * This sink forms one half of Flume's tiered collection support. Events sent to * this sink are transported over the network to the hostname / port pair using * the RPC implementation encapsulated in {@link RpcClient}. * The destination is an instance of Flume's {@link AvroSource}, which * allows Flume agents to forward to other Flume agents, forming a tiered * collection infrastructure. Of course, nothing prevents one from using this * sink to speak to other custom built infrastructure that implements the same * RPC protocol. * </p> * <p> * Events are taken from the configured {@link Channel} in batches of the * configured <tt>batch-size</tt>. The batch size has no theoretical limits * although all events in the batch <b>must</b> fit in memory. Generally, larger * batches are far more efficient, but introduce a slight delay (measured in * millis) in delivery. The batch behavior is such that underruns (i.e. batches * smaller than the configured batch size) are possible. This is a compromise * made to maintain low latency of event delivery. If the channel returns a null * event, meaning it is empty, the batch is immediately sent, regardless of * size. Batch underruns are tracked in the metrics. Empty batches do not incur * an RPC roundtrip.*/ public class AvroSink extends AbstractSink implements Configurable { @Override public Status process() throws EventDeliveryException { Status status = Status.READY; Channel channel = getChannel(); Transaction transaction = channel.getTransaction(); try { transaction.begin(); verifyConnection(); List<Event> batch = Lists.newLinkedList(); for (int i = 0; i < client.getBatchSize(); i++) { Event event = channel.take(); if (event == null) { break; } batch.add(event); } int size = batch.size(); int batchSize = client.getBatchSize(); if (size == 0) { sinkCounter.incrementBatchEmptyCount(); status = Status.BACKOFF; } else { if (size < batchSize) { sinkCounter.incrementBatchUnderflowCount(); } else { sinkCounter.incrementBatchCompleteCount(); } sinkCounter.addToEventDrainAttemptCount(size); client.appendBatch(batch); } transaction.commit(); sinkCounter.addToEventDrainSuccessCount(size); } catch (Throwable t) { transaction.rollback(); if (t instanceof Error) { throw (Error) t; } else if (t instanceof ChannelException) { logger.error("Avro Sink " + getName() + ": Unable to get event from" + " channel " + channel.getName() + ". Exception follows.", t); status = Status.BACKOFF; } else { destroyConnection(); throw new EventDeliveryException("Failed to send events", t); } } finally { transaction.close(); } return status; } }
/** * * A simple sink which reads events from a channel and writes them to HBase. * The Hbase configution is picked up from the first <tt>hbase-site.xml</tt> * encountered in the classpath. This sink supports batch reading of * events from the channel, and writing them to Hbase, to minimize the number * of flushes on the hbase tables. To use this sink, it has to be configured * with certain mandatory parameters:<p> * <tt>table: </tt> The name of the table in Hbase to write to. <p> * <tt>columnFamily: </tt> The column family in Hbase to write to.<p> * This sink will commit each transaction if the table's write buffer size is * reached or if the number of events in the current transaction reaches the * batch size, whichever comes first.<p> * Other optional parameters are:<p> * <tt>serializer:</tt> A class implementing {@link HBaseEventSerializer}. * An instance of * this class will be used to write out events to hbase.<p> * <tt>serializer.*:</tt> Passed in the configure() method to serializer * as an object of {@link org.apache.flume.Context}.<p> * <tt>batchSize: </tt>This is the batch size used by the client. This is the * maximum number of events the sink will commit per transaction. The default * batch size is 100 events. * <p>*/ public class HBaseSink extends AbstractSink implements Configurable { @Override public Status process() throws EventDeliveryException { Status status = Status.READY; Channel channel = getChannel(); Transaction txn = channel.getTransaction(); List<Row> actions = new LinkedList<Row>(); List<Increment> incs = new LinkedList<Increment>(); txn.begin(); for(long i = 0; i < batchSize; i++) { Event event = channel.take(); if(event == null){ status = Status.BACKOFF; counterGroup.incrementAndGet("channel.underflow"); break; } else { serializer.initialize(event, columnFamily); actions.addAll(serializer.getActions()); incs.addAll(serializer.getIncrements()); } } putEventsAndCommit(actions, incs, txn); return status; } private void putEventsAndCommit(List<Row> actions, List<Increment> incs, Transaction txn) throws EventDeliveryException { try { table.batch(actions); for(Increment i: incs){ table.increment(i); } txn.commit(); counterGroup.incrementAndGet("transaction.success"); } catch (Throwable e) { try{ txn.rollback(); } catch (Exception e2) { logger.error("Exception in rollback. Rollback might not have been" + "successful." , e2); } counterGroup.incrementAndGet("transaction.rollback"); logger.error("Failed to commit transaction." + "Transaction rolled back.", e); if(e instanceof Error || e instanceof RuntimeException){ logger.error("Failed to commit transaction." + "Transaction rolled back.", e); Throwables.propagate(e); } else { logger.error("Failed to commit transaction." + "Transaction rolled back.", e); throw new EventDeliveryException("Failed to commit transaction." + "Transaction rolled back.", e); } } finally { txn.close(); } } }
public class HDFSEventSink extends AbstractSink implements Configurable { /** * Pull events out of channel and send it to HDFS - take at the most * txnEventMax, that's the maximum #events to hold in channel for a given * transaction - find the corresponding bucket for the event, ensure the file * is open - extract the pay-load and append to HDFS file <br /> * WARNING: NOT THREAD SAFE */ @Override public Status process() throws EventDeliveryException { Channel channel = getChannel(); Transaction transaction = channel.getTransaction(); List<BucketWriter> writers = Lists.newArrayList(); transaction.begin(); try { Event event = null; int txnEventCount = 0; for (txnEventCount = 0; txnEventCount < txnEventMax; txnEventCount++) { event = channel.take(); if (event == null) { break; } // reconstruct the path name by substituting place holders String realPath = BucketPath.escapeString(path, event.getHeaders(), needRounding, roundUnit, roundValue); BucketWriter bucketWriter = sfWriters.get(realPath); // we haven't seen this file yet, so open it and cache the handle if (bucketWriter == null) { HDFSWriter hdfsWriter = writerFactory.getWriter(fileType); FlumeFormatter formatter = HDFSFormatterFactory .getFormatter(writeFormat); bucketWriter = new BucketWriter(rollInterval, rollSize, rollCount, batchSize, context, realPath, codeC, compType, hdfsWriter, formatter, timedRollerPool, proxyTicket, sinkCounter); sfWriters.put(realPath, bucketWriter); } // track the buckets getting written in this transaction if (!writers.contains(bucketWriter)) { writers.add(bucketWriter); } // Write the data to HDFS append(bucketWriter, event); } if (txnEventCount == 0) { sinkCounter.incrementBatchEmptyCount(); } else if (txnEventCount == txnEventMax) { sinkCounter.incrementBatchCompleteCount(); } else { sinkCounter.incrementBatchUnderflowCount(); } // flush all pending buckets before committing the transaction for (BucketWriter bucketWriter : writers) { if (!bucketWriter.isBatchComplete()) { flush(bucketWriter); } } transaction.commit(); if (txnEventCount > 0) { sinkCounter.addToEventDrainSuccessCount(txnEventCount); } if(event == null) { return Status.BACKOFF; } return Status.READY; } catch (IOException eIO) { transaction.rollback(); LOG.warn("HDFS IO error", eIO); return Status.BACKOFF; } catch (Throwable th) { transaction.rollback(); LOG.error("process failed", th); if (th instanceof Error) { throw (Error) th; } else { throw new EventDeliveryException(th); } } finally { transaction.close(); } } }
//IRC是网络传输协议 public class IRCSink extends AbstractSink implements Configurable { @Override public Status process() throws EventDeliveryException { Status status = Status.READY; Channel channel = getChannel(); Transaction transaction = channel.getTransaction(); try { transaction.begin(); createConnection(); Event event = channel.take(); if (event == null) { counterGroup.incrementAndGet("event.empty"); status = Status.BACKOFF; } else { sendLine(event); counterGroup.incrementAndGet("event.irc"); } transaction.commit(); } catch (ChannelException e) { transaction.rollback(); logger.error( "Unable to get event from channel. Exception follows.", e); status = Status.BACKOFF; } catch (Exception e) { transaction.rollback(); logger.error( "Unable to communicate with IRC server. Exception follows.", e); status = Status.BACKOFF; destroyConnection(); } finally { transaction.close(); } return status; } }
public class LoggerSink extends AbstractSink { private static final Logger logger = LoggerFactory .getLogger(LoggerSink.class); @Override public Status process() throws EventDeliveryException { Status result = Status.READY; Channel channel = getChannel(); Transaction transaction = channel.getTransaction(); Event event = null; try { transaction.begin(); event = channel.take(); if (event != null) { if (logger.isInfoEnabled()) { logger.info("Event: " + EventHelper.dumpEvent(event)); } } else { // No event found, request back-off semantics from the sink runner result = Status.BACKOFF; } transaction.commit(); } catch (Exception ex) { transaction.rollback(); throw new EventDeliveryException("Failed to log event: " + event, ex); } finally { transaction.close(); } return result; } }
RollingFileSink每个sink.rollInterval都会生成一个新的文件用来存储event
public class RollingFileSink extends AbstractSink implements Configurable { @Override public Status process() throws EventDeliveryException { if (shouldRotate) { logger.debug("Time to rotate {}", pathController.getCurrentFile()); if (outputStream != null) { logger.debug("Closing file {}", pathController.getCurrentFile()); try { serializer.flush(); serializer.beforeClose(); outputStream.flush(); outputStream.close(); shouldRotate = false; } catch (IOException e) { throw new EventDeliveryException("Unable to rotate file " + pathController.getCurrentFile() + " while delivering event", e); } serializer = null; outputStream = null; pathController.rotate(); } } if (outputStream == null) { File currentFile = pathController.getCurrentFile(); logger.debug("Opening output stream for file {}", currentFile); try { outputStream = new BufferedOutputStream( new FileOutputStream(currentFile)); serializer = EventSerializerFactory.getInstance( serializerType, serializerContext, outputStream); serializer.afterCreate(); } catch (IOException e) { throw new EventDeliveryException("Failed to open file " + pathController.getCurrentFile() + " while delivering event", e); } } Channel channel = getChannel(); Transaction transaction = channel.getTransaction(); Event event = null; Status result = Status.READY; try { transaction.begin(); event = channel.take(); if (event != null) { serializer.write(event); /* * FIXME: Feature: Rotate on size and time by checking bytes written and * setting shouldRotate = true if we're past a threshold. */ /* * FIXME: Feature: Control flush interval based on time or number of * events. For now, we're super-conservative and flush on each write. */ serializer.flush(); outputStream.flush(); } else { // No events found, request back-off semantics from runner result = Status.BACKOFF; } transaction.commit(); } catch (Exception ex) { transaction.rollback(); throw new EventDeliveryException("Failed to process event: " + event, ex); } finally { transaction.close(); } return result; } }