java远程实时监听linux服务器日志文件并写入kafka

主要需求:连接远程服务器(HBase写入端),监听HBase写入日志,截取ROWKEY,写入Kafka。

实现方式:通过ch.ethz.ganymed包ganymed-ssh2远程连接linux服务器,执行tail命令,生成实时数据流,写入kafka

主要代码:

pom.xml

		
			org.apache.kafka
			kafka-clients
			0.10.2.0
		
		
			org.apache.kafka
			kafka_2.11
			0.10.2.0
		

		
		
			ch.ethz.ganymed
			ganymed-ssh2
			build210
		

主类:

/**  
 * Project Name:kafkademo  
 * File Name:TailLogToKafka.java  
 * Package Name:cmm.kafkademo  
 * Date:2018年12月7日下午2:40:46  
 * Copyright (c) 2018, c2206190880@163.com All Rights Reserved.  
 *  
 */  

package cmm.TailLogToKafka;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Properties;

import ch.ethz.ssh2.Connection;
import ch.ethz.ssh2.Session;
import ch.ethz.ssh2.StreamGobbler;
import cmm.util.PathUtil;

/**  
 * ClassName:TailLogToKafka 
* Function: TODO ADD FUNCTION.
* Reason: TODO ADD REASON.
* Date: 2018年12月7日 下午2:40:46
* @author mmchen * @version * @since JDK 1.7 * @see */ public class TailLogToKafka { private static Connection conn; private static int threadNum=0; private static String cmd = "echo no commond"; private static String topic = "cmmtest"; public static void main(String[] args) { Properties properties = new Properties(); try { InputStream in = new BufferedInputStream(new FileInputStream(new File(PathUtil.getProjectParentPath()+"/remote.properties"))); properties.load(in); } catch (IOException e1) { System.out.println("远程连接配置读取失败!!!"); e1.printStackTrace(); } //远程连接linux服务器 String ip = properties.getProperty("ip"); String usr = properties.getProperty("user"); String psword = properties.getProperty("password"); cmd = properties.getProperty("shell"); topic = properties.getProperty("topic"); //创建远程连接,默认连接端口为22,如果不使用默认,可以使用方法 //new Connection(ip, port)创建对象 conn = new Connection(ip); try { //连接远程服务器 conn.connect(); //使用用户名和密码登录 conn.authenticateWithPassword(usr, psword); } catch (IOException e) { System.err.printf("用户%s密码%s登录服务器%s失败!", usr, psword, ip); e.printStackTrace(); } //创建线程,执行shell命令,获取实时数据流,写入kafka threadNum=1; new Thread(new Runnable() { @Override public void run() { try { Session session = conn.openSession(); session.execCommand(cmd); InputStream out = new StreamGobbler(session.getStdout()); BufferedReader outBufferedReader = new BufferedReader(new InputStreamReader(out)); myProducer producerDemo = new myProducer(); while (true) { String line = outBufferedReader.readLine(); if (line == null) { threadNum=0; outBufferedReader.close(); session.close(); conn.close(); break; } System.out.println(line); //数据写入kafka producerDemo.produce(topic,line); } } catch (IOException e) { System.out.println("open session fail"); e.printStackTrace(); } } }).start(); while (threadNum>0) { Thread.yield(); } } }

Kafka生产者:

/**  
 * Project Name:kafkademo  
 * File Name:Producer01.java  
 * Package Name:cmm.kafkademo  
 * Date:2018年12月6日下午2:40:28  
 * Copyright (c) 2018, c2206190880@163.com All Rights Reserved.  
 *  
 */  

package cmm.TailLogToKafka;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;

import cmm.util.PathUtil;

/**  
 * ClassName:Producer01 
* Function: TODO ADD FUNCTION.
* Reason: TODO ADD REASON.
* Date: 2018年12月6日 下午2:40:28
* @author mmchen * @version * @since JDK 1.7 * @see */ public class myProducer { private final KafkaProducer producer; public myProducer() throws IOException { Properties props = new Properties(); InputStream in = new BufferedInputStream(new FileInputStream(new File(PathUtil.getProjectParentPath()+"/myProducer.properties"))); props.load(in); producer = new KafkaProducer(props); } public void produce(String topic,String data) { producer.send(new ProducerRecord(topic, data)); } public static void main(String[] args) { try { new myProducer().produce("",""); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }

properties配置文件:

myProducer.properties

#服务器ip
bootstrap.servers=xxx:9092,1xxx:9092,xxx:9092
#所有follower都响应了才认为消息提交成功,即"committed"
acks=all
#retries = MAX 无限重试,直到你意识到出现了问题:)
retries=0
#producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数//
#batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms
batch.size=16384
#延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理
linger.ms=1
#producer可以用来缓存数据的内存大小。
buffer.memory=33554432
key.serializer=org.apache.kafka.common.serialization.IntegerSerializer
value.serializer=org.apache.kafka.common.serialization.StringSerializer

remote.properties

ip=
user=
password=
shell=tail -F filepath | grep "ROWKEY"
topic=test

 

你可能感兴趣的:(ToolDevelopment)