主要需求:连接远程服务器(HBase写入端),监听HBase写入日志,截取ROWKEY,写入Kafka。
实现方式:通过ch.ethz.ganymed包ganymed-ssh2远程连接linux服务器,执行tail命令,生成实时数据流,写入kafka
主要代码:
pom.xml
org.apache.kafka
kafka-clients
0.10.2.0
org.apache.kafka
kafka_2.11
0.10.2.0
ch.ethz.ganymed
ganymed-ssh2
build210
主类:
/**
* Project Name:kafkademo
* File Name:TailLogToKafka.java
* Package Name:cmm.kafkademo
* Date:2018年12月7日下午2:40:46
* Copyright (c) 2018, c2206190880@163.com All Rights Reserved.
*
*/
package cmm.TailLogToKafka;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Properties;
import ch.ethz.ssh2.Connection;
import ch.ethz.ssh2.Session;
import ch.ethz.ssh2.StreamGobbler;
import cmm.util.PathUtil;
/**
* ClassName:TailLogToKafka
* Function: TODO ADD FUNCTION.
* Reason: TODO ADD REASON.
* Date: 2018年12月7日 下午2:40:46
* @author mmchen
* @version
* @since JDK 1.7
* @see
*/
public class TailLogToKafka {
private static Connection conn;
private static int threadNum=0;
private static String cmd = "echo no commond";
private static String topic = "cmmtest";
public static void main(String[] args) {
Properties properties = new Properties();
try {
InputStream in = new BufferedInputStream(new FileInputStream(new File(PathUtil.getProjectParentPath()+"/remote.properties")));
properties.load(in);
} catch (IOException e1) {
System.out.println("远程连接配置读取失败!!!");
e1.printStackTrace();
}
//远程连接linux服务器
String ip = properties.getProperty("ip");
String usr = properties.getProperty("user");
String psword = properties.getProperty("password");
cmd = properties.getProperty("shell");
topic = properties.getProperty("topic");
//创建远程连接,默认连接端口为22,如果不使用默认,可以使用方法
//new Connection(ip, port)创建对象
conn = new Connection(ip);
try {
//连接远程服务器
conn.connect();
//使用用户名和密码登录
conn.authenticateWithPassword(usr, psword);
} catch (IOException e) {
System.err.printf("用户%s密码%s登录服务器%s失败!", usr, psword, ip);
e.printStackTrace();
}
//创建线程,执行shell命令,获取实时数据流,写入kafka
threadNum=1;
new Thread(new Runnable() {
@Override
public void run() {
try {
Session session = conn.openSession();
session.execCommand(cmd);
InputStream out = new StreamGobbler(session.getStdout());
BufferedReader outBufferedReader = new BufferedReader(new InputStreamReader(out));
myProducer producerDemo = new myProducer();
while (true) {
String line = outBufferedReader.readLine();
if (line == null) {
threadNum=0;
outBufferedReader.close();
session.close();
conn.close();
break;
}
System.out.println(line);
//数据写入kafka
producerDemo.produce(topic,line);
}
} catch (IOException e) {
System.out.println("open session fail");
e.printStackTrace();
}
}
}).start();
while (threadNum>0) {
Thread.yield();
}
}
}
Kafka生产者:
/**
* Project Name:kafkademo
* File Name:Producer01.java
* Package Name:cmm.kafkademo
* Date:2018年12月6日下午2:40:28
* Copyright (c) 2018, c2206190880@163.com All Rights Reserved.
*
*/
package cmm.TailLogToKafka;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import cmm.util.PathUtil;
/**
* ClassName:Producer01
* Function: TODO ADD FUNCTION.
* Reason: TODO ADD REASON.
* Date: 2018年12月6日 下午2:40:28
* @author mmchen
* @version
* @since JDK 1.7
* @see
*/
public class myProducer {
private final KafkaProducer producer;
public myProducer() throws IOException {
Properties props = new Properties();
InputStream in = new BufferedInputStream(new FileInputStream(new File(PathUtil.getProjectParentPath()+"/myProducer.properties")));
props.load(in);
producer = new KafkaProducer(props);
}
public void produce(String topic,String data) {
producer.send(new ProducerRecord(topic, data));
}
public static void main(String[] args) {
try {
new myProducer().produce("","");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
properties配置文件:
myProducer.properties
#服务器ip
bootstrap.servers=xxx:9092,1xxx:9092,xxx:9092
#所有follower都响应了才认为消息提交成功,即"committed"
acks=all
#retries = MAX 无限重试,直到你意识到出现了问题:)
retries=0
#producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数//
#batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms
batch.size=16384
#延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理
linger.ms=1
#producer可以用来缓存数据的内存大小。
buffer.memory=33554432
key.serializer=org.apache.kafka.common.serialization.IntegerSerializer
value.serializer=org.apache.kafka.common.serialization.StringSerializer
remote.properties
ip=
user=
password=
shell=tail -F filepath | grep "ROWKEY"
topic=test