在上篇博客中,我们使用linux原生的tail ,启动多个线程,做到对多个文件的数据收集,但是,随着业务日志增多,日志以日期分割,是个常用的做法,但是这样,就使得我们的flume插件在日期交换的凌晨,出现io断裂,造成不能继续收集数据,解决这个问题的思路,大概有两种:
1,tail -f 替换成 tail -F
2,使用java读取文件,判断是否为新文件
结合难以程度与可定制的方案,我们选择第二种,因为第二种中,我们可以做自定义的断点续传,具体思路如下:
/*
* 作者:许恕
* 时间:2016年5月3日
* 功能:实现tail 某目录下的所有符合正则条件的文件
* Email:[email protected]
* To detect all files in a folder
*/
package org.apache.flume.source;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.SystemClock;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.instrumentation.SourceCounter;
import org.apache.flume.tools.HostUtils;
import org.mortbay.util.ajax.JSON;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.charset.Charset;
import java.util.*;
import java.util.concurrent.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* step:
* 1,config one path
* 2,find all file with RegExp
* 3,tail one children file
* 4,batch to channal
*
* demo:
* demo.sources.s1.type = org.apache.flume.source.ExecTailSource
* demo.sources.s1.filepath=/export/home/tomcat/logs/auth.el.net/
* demo.sources.s1.filenameRegExp=(.log{1})$
* demo.sources.s1.tailing=true
* demo.sources.s1.readinterval=300
* demo.sources.s1.startAtBeginning=false
* demo.sources.s1.restart=true
*/
public class ExecTailSource extends AbstractSource implements EventDrivenSource,
Configurable {
private static final Logger logger = LoggerFactory
.getLogger(ExecTailSource.class);
private SourceCounter sourceCounter;
private ExecutorService executor;
private List<ExecRunnable> listRuners;
private List<Future<?>> listFuture;
private long restartThrottle;
private boolean restart = true;
private boolean logStderr;
private Integer bufferCount;
private long batchTimeout;
private Charset charset;
private String filepath;
private String filenameRegExp;
private boolean tailing;
private Integer readinterval;
private boolean startAtBeginning;
@Override
public void start() {
logger.info("=start=> flume tail source start begin time:"+new Date().toString());
logger.info("ExecTail source starting with filepath:{}", filepath);
List<String> listFiles = getFileList(filepath);
if(listFiles==null || listFiles.isEmpty()){
Preconditions.checkState(listFiles != null && !listFiles.isEmpty(),
"The filepath's file not have fiels with filenameRegExp");
}
executor = Executors.newFixedThreadPool(listFiles.size());
listRuners = new ArrayList<ExecRunnable>();
listFuture = new ArrayList<Future<?>>();
logger.info("files size is {} ", listFiles.size());
// FIXME: Use a callback-like executor / future to signal us upon failure.
for(String oneFilePath : listFiles){
ExecRunnable runner = new ExecRunnable(getChannelProcessor(), sourceCounter,
restart, restartThrottle, logStderr, bufferCount, batchTimeout, charset,oneFilePath,tailing,readinterval,startAtBeginning);
listRuners.add(runner);
Future<?> runnerFuture = executor.submit(runner);
listFuture.add(runnerFuture);
logger.info("{} is begin running",oneFilePath);
}
/*
* NB: This comes at the end rather than the beginning of the method because
* it sets our state to running. We want to make sure the executor is alive
* and well first.
*/
sourceCounter.start();
super.start();
logger.info("=start=> flume tail source start end time:"+new Date().toString());
logger.debug("ExecTail source started");
}
@Override
public void stop() {
logger.info("=stop=> flume tail source stop begin time:"+new Date().toString());
if(listRuners !=null && !listRuners.isEmpty()){
for(ExecRunnable oneRunner : listRuners){
if(oneRunner != null) {
oneRunner.setRestart(false);
oneRunner.kill();
}
}
}
if(listFuture !=null && !listFuture.isEmpty()){
for(Future<?> oneFuture : listFuture){
if (oneFuture != null) {
logger.debug("Stopping ExecTail runner");
oneFuture.cancel(true);
logger.debug("ExecTail runner stopped");
}
}
}
executor.shutdown();
while (!executor.isTerminated()) {
logger.debug("Waiting for ExecTail executor service to stop");
try {
executor.awaitTermination(500, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
logger.debug("Interrupted while waiting for ExecTail executor service "
+ "to stop. Just exiting.");
Thread.currentThread().interrupt();
}
}
sourceCounter.stop();
super.stop();
logger.info("=stop=> flume tail source stop end time:"+new Date().toString());
}
@Override
public void configure(Context context) {
filepath = context.getString("filepath");
Preconditions.checkState(filepath != null,
"The parameter filepath must be specified");
logger.info("The parameter filepath is {}" ,filepath);
filenameRegExp = context.getString("filenameRegExp");
Preconditions.checkState(filenameRegExp != null,
"The parameter filenameRegExp must be specified");
logger.info("The parameter filenameRegExp is {}" ,filenameRegExp);
restartThrottle = context.getLong(ExecSourceConfigurationConstants.CONFIG_RESTART_THROTTLE,
ExecSourceConfigurationConstants.DEFAULT_RESTART_THROTTLE);
tailing = context.getBoolean(ExecSourceConfigurationConstants.CONFIG_TAILING_THROTTLE,
ExecSourceConfigurationConstants.DEFAULT_ISTAILING_TRUE);
readinterval=context.getInteger(ExecSourceConfigurationConstants.CONFIG_READINTERVAL_THROTTLE,
ExecSourceConfigurationConstants.DEFAULT_READINTERVAL);
startAtBeginning=context.getBoolean(ExecSourceConfigurationConstants.CONFIG_STARTATBEGINNING_THROTTLE,
ExecSourceConfigurationConstants.DEFAULT_STARTATBEGINNING);
restart = context.getBoolean(ExecSourceConfigurationConstants.CONFIG_RESTART,
ExecSourceConfigurationConstants.DEFAULT_RESTART_TRUE);
logStderr = context.getBoolean(ExecSourceConfigurationConstants.CONFIG_LOG_STDERR,
ExecSourceConfigurationConstants.DEFAULT_LOG_STDERR);
bufferCount = context.getInteger(ExecSourceConfigurationConstants.CONFIG_BATCH_SIZE,
ExecSourceConfigurationConstants.DEFAULT_BATCH_SIZE);
batchTimeout = context.getLong(ExecSourceConfigurationConstants.CONFIG_BATCH_TIME_OUT,
ExecSourceConfigurationConstants.DEFAULT_BATCH_TIME_OUT);
charset = Charset.forName(context.getString(ExecSourceConfigurationConstants.CHARSET,
ExecSourceConfigurationConstants.DEFAULT_CHARSET));
if (sourceCounter == null) {
sourceCounter = new SourceCounter(getName());
}
}
/**
* 获取指定路径下的所有文件列表
*
* @param dir 要查找的目录
* @return
*/
public List<String> getFileList(String dir) {
List<String> listFile = new ArrayList<String>();
File dirFile = new File(dir);
//如果不是目录文件,则直接返回
if (dirFile.isDirectory()) {
//获得文件夹下的文件列表,然后根据文件类型分别处理
File[] files = dirFile.listFiles();
if (null != files && files.length > 0) {
//根据时间排序
Arrays.sort(files, new Comparator<File>() {
public int compare(File f1, File f2) {
return (int) (f1.lastModified() - f2.lastModified());
}
public boolean equals(Object obj) {
return true;
}
});
for (File file : files) {
//如果不是目录,直接添加
if (!file.isDirectory()) {
String oneFileName = file.getName();
if(match(filenameRegExp,oneFileName)){
listFile.add(file.getAbsolutePath());
logger.info("filename:{} is pass",oneFileName);
}
} else {
//对于目录文件,递归调用
listFile.addAll(getFileList(file.getAbsolutePath()));
}
}
}
}else{
logger.info("FilePath:{} is not Directory",dir);
}
return listFile;
}
/**
* @param regex
* 正则表达式字符串
* @param str
* 要匹配的字符串
* @return 如果str 符合 regex的正则表达式格式,返回true, 否则返回 false;
*/
private boolean match(String regex, String str) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(str);
return matcher.find();
}
private static class ExecRunnable implements Runnable {
public ExecRunnable( ChannelProcessor channelProcessor,
SourceCounter sourceCounter, boolean restart, long restartThrottle,
boolean logStderr, int bufferCount, long batchTimeout, Charset charset,String filepath,boolean tailing,Integer readinterval,boolean startAtBeginning) {
this.channelProcessor = channelProcessor;
this.sourceCounter = sourceCounter;
this.restartThrottle = restartThrottle;
this.bufferCount = bufferCount;
this.batchTimeout = batchTimeout;
this.restart = restart;
this.logStderr = logStderr;
this.charset = charset;
this.filepath=filepath;
this.logfile=new File(filepath);
this.tailing=tailing;
this.readinterval=readinterval;
this.startAtBeginning=startAtBeginning;
}
private final ChannelProcessor channelProcessor;
private final SourceCounter sourceCounter;
private volatile boolean restart;
private final long restartThrottle;
private final int bufferCount;
private long batchTimeout;
private final boolean logStderr;
private final Charset charset;
private SystemClock systemClock = new SystemClock();
private Long lastPushToChannel = systemClock.currentTimeMillis();
ScheduledExecutorService timedFlushService;
ScheduledFuture<?> future;
private String filepath;
/**
* 当读到文件结尾后暂停的时间间隔
*/
private long readinterval = 500;
/**
* 设置日志文件
*/
private File logfile;
/**
* 设置是否从头开始读
*/
private boolean startAtBeginning = false;
/**
* 设置tail运行标记
*/
private boolean tailing = false;
private static String getDomain(String filePath){
String[] strs = filePath.split("/");
String domain ;
domain=strs[strs.length-2];
if(domain==null || domain.isEmpty()){
domain=filePath;
}
return domain;
}
@Override
public void run() {
do {
logger.info("=run=> flume tail source run start time:"+new Date().toString());
long filePointer = 0;
if (this.startAtBeginning) { //判断是否从头开始读文件
filePointer = 0;
} else {
filePointer = this.logfile.length(); //指针标识从文件的当前长度开始。
}
final List<Event> eventList = new ArrayList<Event>();
timedFlushService = Executors.newSingleThreadScheduledExecutor(
new ThreadFactoryBuilder().setNameFormat(
"timedFlushExecService" +
Thread.currentThread().getId() + "-%d").build());
RandomAccessFile randomAccessFile = null;
try {
randomAccessFile= new RandomAccessFile(logfile, "r"); //创建随机读写文件
future = timedFlushService.scheduleWithFixedDelay(new Runnable() {
@Override
public void run() {
try {
synchronized (eventList) {
if(!eventList.isEmpty() && timeout()) {
flushEventBatch(eventList);
}
}
} catch (Exception e) {
logger.error("Exception occured when processing event batch", e);
if(e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
}
}
},
batchTimeout, batchTimeout, TimeUnit.MILLISECONDS);
while (this.tailing) {
long fileLength = this.logfile.length();
if (fileLength < filePointer) {
randomAccessFile = new RandomAccessFile(logfile, "r");
filePointer = 0;
}
if (fileLength > filePointer) {
randomAccessFile.seek(filePointer);
String line = randomAccessFile.readLine();
while (line != null) {
//送channal
synchronized (eventList) {
sourceCounter.incrementEventReceivedCount();
HashMap body = new HashMap();
body.put("context",line.toString());
body.put("filepath", filepath);
body.put("contextType", filepath);
body.put("created", System.currentTimeMillis());
body.put("localHostIp", HostUtils.getLocalHostIp());
body.put("localHostName", HostUtils.getLocalHostName());
body.put("domain", getDomain(filepath));
String bodyjson = JSON.toString(body);
Event oneEvent = EventBuilder.withBody(bodyjson.getBytes(charset));
eventList.add(oneEvent);
if(eventList.size() >= bufferCount || timeout()) {
flushEventBatch(eventList);
}
}
//读下一行
line = randomAccessFile.readLine();
}
filePointer = randomAccessFile.getFilePointer();
}
Thread.sleep(this.readinterval);
}
synchronized (eventList) {
if(!eventList.isEmpty()) {
flushEventBatch(eventList);
}
}
} catch (Exception e) {
logger.error("Failed while running filpath: " + filepath, e);
if(e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
} finally {
if(randomAccessFile!=null){
try {
randomAccessFile.close();
} catch (IOException ex) {
logger.error("Failed to close reader for ExecTail source", ex);
}
}
}
logger.info("=run=> flume tail source run restart:"+restart);
if(restart) {
logger.info("=run=> flume tail source run restart time:"+new Date().toString());
logger.info("Restarting in {}ms", restartThrottle);
try {
Thread.sleep(restartThrottle);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
} else {
logger.info("filepath [" + filepath + "] exited with restart[" + restart+"]");
}
} while(restart);
}
private void flushEventBatch(List<Event> eventList){
channelProcessor.processEventBatch(eventList);
sourceCounter.addToEventAcceptedCount(eventList.size());
eventList.clear();
lastPushToChannel = systemClock.currentTimeMillis();
}
private boolean timeout(){
return (systemClock.currentTimeMillis() - lastPushToChannel) >= batchTimeout;
}
private static String[] formulateShellCommand(String shell, String command) {
String[] shellArgs = shell.split("\\s+");
String[] result = new String[shellArgs.length + 1];
System.arraycopy(shellArgs, 0, result, 0, shellArgs.length);
result[shellArgs.length] = command;
return result;
}
public int kill() {
logger.info("=kill=> flume tail source kill start time:"+new Date().toString());
this.tailing=false;
synchronized (this.getClass()) {
try {
// Stop the Thread that flushes periodically
if (future != null) {
future.cancel(true);
}
if (timedFlushService != null) {
timedFlushService.shutdown();
while (!timedFlushService.isTerminated()) {
try {
timedFlushService.awaitTermination(500, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
logger.debug("Interrupted while waiting for ExecTail executor service "
+ "to stop. Just exiting.");
Thread.currentThread().interrupt();
}
}
}
logger.info("=kill=> flume tail source kill end time:" + new Date().toString());
return Integer.MIN_VALUE;
} catch (Exception ex) {
logger.error("=kill=>", ex);
Thread.currentThread().interrupt();
}
}
logger.info("=kill=> flume tail source kill end time:"+new Date().toString());
return Integer.MIN_VALUE / 2;
}
public void setRestart(boolean restart) {
this.restart = restart;
}
}
private static class StderrReader extends Thread {
private BufferedReader input;
private boolean logStderr;
protected StderrReader(BufferedReader input, boolean logStderr) {
this.input = input;
this.logStderr = logStderr;
}
@Override
public void run() {
try {
int i = 0;
String line = null;
while((line = input.readLine()) != null) {
if(logStderr) {
// There is no need to read 'line' with a charset
// as we do not to propagate it.
// It is in UTF-16 and would be printed in UTF-8 format.
logger.info("StderrLogger[{}] = '{}'", ++i, line);
}
}
} catch (IOException e) {
logger.info("StderrLogger exiting", e);
} finally {
try {
if(input != null) {
input.close();
}
} catch (IOException ex) {
logger.error("Failed to close stderr reader for ExecTail source", ex);
}
}
}
}
}
这次的经验,是非常宝贵的,在这次的解决过程中,一开始,没有思路,后来在沟通的过程中,一直都是怀疑自己的代码,怀疑flume的源码,后来在某一天早上,认真梳理了下,看看每天深夜23:59会发生什么,除了文件发生了变化,其他没有变化,那问题就好定位了,使我们将问题想复杂了,我们以后排除问题,还是要遵循一个变量的原则,就可以很快排查出问题,引以为戒!