关于简单的hive练习

现给定一个一千条的原始数据的txt文件,要求清洗掉多余字符,按照空格和换行规则导入hive中。

  1、导入txt文件

    使用BufferedReader方法导入txt文件,准备进行处理。

  2、清洗数据

    使用字符串分割函数split()将数据按照空格、/、+、,等字符进行分割。

  3、导出txt文件

    使用FileWriter方法导出txt文件,准备进行上传。

  4、上传文件

    将导出清洗完毕的数据文件上传至hdfs中。

  5、导入hive

    从hdfs中将文件导入hive。

 程序如下

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.ArrayList;

public class CleanData {

    public static ArrayList ip = new ArrayList();
    public static ArrayList date = new ArrayList();
    public static ArrayList day = new ArrayList();
    public static ArrayList traffic = new ArrayList();
    public static ArrayList type = new ArrayList();
    public static ArrayList id = new ArrayList();

    public static void cleanData() throws IOException {
        String str;
        File f = new File("/home/ryq1998/Documents/Tencent Files/316703799/FileRecv/result.txt");
        BufferedReader bf = new BufferedReader(new FileReader(f));
        try {
            while ((str = bf.readLine()) != null) {
                String[] s = str.split(",");
                ip.add(s[0]);
                String[] newdate = s[1].split("\\\\|\\:|\\b|\\+");
                date.add(newdate[4] + "-" + "11" + "-" + newdate[0] + " " + newdate[6] + ":" + newdate[8] + ":"
                        + newdate[10]);
                day.add(s[2]);
                String[] newtriffic = s[3].split(" ");
                traffic.add(Long.parseLong(newtriffic[0]));
                type.add(s[4]);
                id.add(s[5]);
            }
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            bf.close();
            returnText(ip, date, day, traffic, type, id);
            /*
             * 将数据插入mysql数据库
             */
            /*addSql(ip, date, day, traffic, type, id);*/
            
        }
    }

    /*
     * 存储mysql数据库
     */
    public static void addSql(ArrayList ip, ArrayList date, ArrayList day,
            ArrayList traffic, ArrayList type, ArrayList id) {

        Connection con = null;
        try {
            con = JdbcUtils.getConnection();
            PreparedStatement psql;
            for (int i = 0; i < ip.size(); i++) {
                psql = con.prepareStatement(
                        "insert into CleanData(ip,date,day,traffic,type,id) " + "values(?,?,?,?,?,?)");
                psql.setString(1, ip.get(i));
                psql.setString(2, date.get(i));
                psql.setString(3, day.get(i));
                psql.setLong(4, traffic.get(i));
                psql.setString(5, type.get(i));
                psql.setString(6, id.get(i));
                psql.executeUpdate();
                psql.close();
            }
            con.close();
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }
    
    public static void returnText(ArrayList ip, ArrayList date, ArrayList day,
            ArrayList traffic, ArrayList type, ArrayList id) {
        
        FileWriter fileWriter = null;
        try {
            fileWriter = new FileWriter("/home/ryq1998/result.txt");//创建文本文件
            int i=0;
            for(;i) {
                if(i==ip.size()-1) {
                    fileWriter.write(ip.get(i)+" "+date.get(i)+" "+day.get(i)+" "+traffic.get(i)+" "+type.get(i)+" "+id.get(i));
                    break;
                }
                fileWriter.write(ip.get(i)+" "+date.get(i)+" "+day.get(i)+" "+traffic.get(i)+" "+type.get(i)+" "+id.get(i)+"\r");//写入 \r\n换行
            }
            fileWriter.flush();
            fileWriter.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        
        
        
    }

    public static void main(String[] args) throws IOException {
        cleanData();
    }

}
View Code

  截图如下

  关于简单的hive练习_第1张图片

 

 

关于简单的hive练习_第2张图片

 

 

 

 

 

你可能感兴趣的:(关于简单的hive练习)