Hive表根据时间创建分区

(1)时间函数DirectoryAndBrands.java

/**
* 获得当前时间的前一天
* */
public static Date getNextDay(Date date) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
calendar.add(Calendar.DAY_OF_MONTH, -1);
date = calendar.getTime();
return date;
}
/*
* 获得当前时间
*/
public static Date getToday(Date date) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
calendar.add(Calendar.DAY_OF_MONTH, 0);
date = calendar.getTime();
return date;
}
/*
* 获取3天前
*/
public static Date getThreeDayAgo(Date date) {
Calendar calendar = Calendar.getInstance();
calendar.setTime(date);
calendar.add(Calendar.DAY_OF_MONTH, -3);
date = calendar.getTime();
return date;
}



(2)根据时间创建分区Hive表

SimpleDateFormat df = new SimpleDateFormat("yyyyMMdd");//设置日期格式
    String time = df.format(DirectoryAndBrands.getNextDay(new Date()));

String today = df.format(DirectoryAndBrands.getToday(new Date()));
    String threeDayAgo = df.format(DirectoryAndBrands.getThreeDayAgo(new Date())); 


HiveContext hiveCtx = new HiveContext(jsc);

hiveCtx.sql("use xxx");

SQLContext sqlCtx = new SQLContext(jsc);


        JavaRDD bbRDD  = aa.map(new Function, Row>() {  
            public Row call( Tuple2 pair)  
                throws Exception {  
                String cc = pair._1();

               String dd = pair._2()
                return RowFactory.create(cc, dd);  
            }  
        }); 
        List structFields = new ArrayList();  
        structFields.add(DataTypes.createStructField( "cc", DataTypes.StringType, true )); 
        structFields.add(DataTypes.createStructField( "dd", DataTypes.StringType, true )); 
       
        StructType structType = DataTypes.createStructType( structFields );   
        Dataset categoryDF = sqlCtx.createDataFrame(bbRDD,structType);


        categoryDF.registerTempTable("temp"); 


        //创建具有分区的表格
        hiveCtx.sql("CREATE TABLE IF NOT EXISTS xxx.table (cc STRING, dd STRING) partitioned by (ds string) row format delimited fields terminated by ',' COLLECTION ITEMS TERMINATED BY ',' stored as textfile");
        //删除时间为3天之前的分区
        String droupSql = "ALTER TABLE xxx.table DROP IF EXISTS PARTITION (ds="+threeDayAgo+")";
hiveCtx.sql(droupSql);
//如果当前时间的分区存在也要删除,后面会插入
        String droupTodaySql = "ALTER TABLE xxx.table DROP IF EXISTS PARTITION (ds="+today+")";
hiveCtx.sql(droupTodaySql);
//增加当前时间的分区
String addSql = "ALTER TABLE xxx.table ADD PARTITION (ds="+today+")";
        hiveCtx.sql(addSql);
        String inserSql = "insert into xxx.table PARTITION (ds="+today+") select cc,dd from temp";
        hiveCtx.sql(inserSql);

你可能感兴趣的:(RDD,SPARK)