[root@hdp-3 apps]# java -jar springbt-0.0.1-SNAPSHOT.jar
修改/usr/local/nginx/conf/nginx.conf,在server上添加这段代码
upstream frame-tomcat {
server hdp-3:8088; //代理的host和运行jar包的端口号
}
server {
listen 80;
server_name hdp-1; //在浏览器输入地址的名字
#charset koi8-r;
access_log logs/log.frame.access.log main;
location / {
# root html;
# index index.html index.htm;
proxy_pass http://frame-tomcat;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
}
启动nginx
[root@hdp-1 sbin]# ./nginx
[root@hdp-1 bin]# sh start-allkafka.sh //脚本启动集群
flume需与nginx在同一台机器
# 指定各个核心组件
ag1.sources = r1
ag1.sinks = k1
ag1.channels = c1
# 准备数据源
ag1.sources.r1.type = exec
ag1.sources.r1.command = tail -F /usr/local/nginx/logs/log.frame.access.log
# Describe the sink
ag1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
ag1.sinks.k1.kafka.topic=xin
ag1.sinks.k1.kafka.bootstrap.servers=hdp-1:9092,hdp-2:9092,hdp-3:9092
# Use a channel which buffers events in memory
ag1.channels.c1.type = memory
ag1.channels.c1.capacity = 20000
ag1.channels.c1.transactionCapacity = 10000
# Bind the source and sink to the channel
ag1.sources.r1.channels = c1
ag1.sinks.k1.channel = c1
启动flume
[root@hdp-1 bin]# ./flume-ng agent -C ../conf/ -f ../flume_kafka.conf -n ag1 -Dflume.root.logger=INFO,console
方法一:Consumer通过ConsumerRecords获取到字符串类型数据,再将数据转换成流的形式存储到hdfs中
不足:为了保证点击每一次数据存储到hdfs中,采用时间+随机数命名方式
Consumer.java
package csdn;
import java.util.Collections;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
public class Consumer {
private static KafkaConsumer consumer;
private static Properties props;
static {
props = new Properties();
//生产者kafkka地址
props.put("bootstrap.servers", "hdp-2:9092");
props.put("zookeeper.connect","hdp-1:2181,hdp-2:2181,hdp-3:2181");
//key反序列化
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//组???????
props.put("group.id", "wang");
}
private static void ConsumerMessage() {
HDFSWriter hdfsWriter = new HDFSWriter();
//允许自动提交位移
props.put("enable.auto.commit", true);
consumer = new KafkaConsumer(props);
consumer.subscribe(Collections.singleton("xin"));
//使用轮询拉取数据--消费完成之后会根据设置时长来清除消息,被消费过的消息,如果想再次被消费,可以根据偏移量(offset)来获取
try {
while (true) {
//poll方法获取数据
ConsumerRecords records = consumer.poll(100);
for (ConsumerRecord r : records) {
System.out.printf("topic = %s, offset = %s, key = %s, value = %s", r.topic(), r.offset(),
r.key(), r.value());
hdfsWriter.writer(r.toString());
}
}
} finally {
consumer.close();
}
}
public static void main(String[] args) {
ConsumerMessage();
}
}
HDFSWriter.java
package csdn;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
public class HDFSWriter {
public void writer(String str) {
try {
InputStream inputStream = new BufferedInputStream(
new ByteArrayInputStream(str.getBytes()));//打开一个BufferedInputStream字节输入流
URI uri= null;
FileSystem fs=null;
try {
uri = new URI("hdfs://hdp-1:9000");
Configuration conf=new Configuration();
conf.set("dfs.blocksize","64m");
conf.set("dfs.replication", "1");
String user="root";
fs = FileSystem.get(uri, conf, user);
} catch (Exception e) {
e.printStackTrace();
}
Date date = new Date();
String strDateFormat = "yyyy-MM-dd-HH-mm-ss";
SimpleDateFormat sdf = new SimpleDateFormat(strDateFormat);
Random random = new Random();
int end = random.nextInt(99);
final String ends = String.format("%02d", end);//如果不足两位,前面补0
String name = sdf.format(date).toString()+ends;
final Path path = new Path("/kafka/" + name + ".log");
//这里是creat()方法表示新创建一个文件,如果想在一个文件上追加,请用append()方法。
FSDataOutputStream fsDataOutputStream = fs.create(path);
// 直接把str传入hdfs
// OutputStreamWriter out = new OutputStreamWriter(fsDataOutputStream);
// out.write(str);
// 流的形式传值
IOUtils.copyBytes(inputStream, fsDataOutputStream, 1024, true);
fsDataOutputStream.close();
fs.close();
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}