package com.weshare.bigdata.ods.handler;
import com.alibaba.fastjson.JSONObject;
import com.weshare.bigdata.entity.ClusterEnvirEntity;
import com.weshare.bigdata.facility.ClusterEnvirFacility;
import com.weshare.bigdata.ods.constant.DetailConstant;
import com.weshare.bigdata.ods.utils.DateUtils;
import com.weshare.dataframework.spark.DfSparkSession;
import com.weshare.dataframework.spark.entity.SparkApplication;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
import org.apache.hadoop.util.Progressable;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.sql.*;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import java.io.IOException;
import java.util.*;
public class KafkaDataClassifyBySelf {
private static Logger logger = LoggerFactory.getLogger(KafkaDataClassifyBySelf.class);
static JavaInputDStream> directStream;
static String hdfspath="/user/admin/FA_OFFLINE/";
static String classifyTbl;
static String kuduMaster;
static String bootstrap;
public static void main(String[] args){
//System.setProperty("hadoop.home.dir","etc/dtconf/bin");
SparkApplication application = new SparkApplication();
application.setAppName("DemoStreaming");
application.setSerializer("org.apache.spark.serializer.KryoSerializer");
SparkSession sparkSession = DfSparkSession.bulid(application);
JavaSparkContext javaSparkContext = new JavaSparkContext(sparkSession.sparkContext());
JavaStreamingContext jssc = new JavaStreamingContext(javaSparkContext, Durations.seconds(600));
//设置kudumaster和时间切片配置表
ClusterEnvirFacility clusterEnvirFacility = new ClusterEnvirFacility();
ClusterEnvirEntity environmentInfo = clusterEnvirFacility.getEnvironmentInfo(sparkSession);
kuduMaster=environmentInfo.getKuduMaster();
classifyTbl="impala::config.bus_calibration_time";
//获取kafka节点信息
bootstrap=environmentInfo.getBootstrap();
HashMap kafkaMap = new HashMap<>();
//Kafka服务监听端口
kafkaMap.put("bootstrap.servers",bootstrap);
kafkaMap.put("key.deserializer", StringDeserializer.class);
kafkaMap.put("value.deserializer", StringDeserializer.class);
//消费者ID,随意指定
kafkaMap.put("group.id", "KafkaDataClassifyData");
//指定从latest(最新,其他版本的是largest这里不行)earliest(最早)处开始读取数据
kafkaMap.put("auto.offset.reset", "earliest");
//如果true,consumer定期地往zookeeper写入每个分区的offset
kafkaMap.put("enable.auto.commit", "false");
Map regularTime = KafkaDataClassifyBySelf.getRegularTime(sparkSession,kuduMaster,classifyTbl);
ClassTag