springboot和flink 大数据实时写入hdfs

一:flink
官网API: https://nightlies.apache.org/flink/flink-docs-release-1.13/zh/docs/connectors/datastream/streamfile_sink/

		//文件滚动策略
        RollingPolicy rollingPolicy = DefaultRollingPolicy.builder()
                .withMaxPartSize(1024 * 1024 * 1024)
                .withInactivityInterval(TimeUnit.MINUTES.toMillis(5))
                .withRolloverInterval(TimeUnit.MINUTES.toMillis(15))
                .build();

        //文件写到指定路径 FILE_SAVE_PATH
        StreamingFileSink<String> fileSink = StreamingFileSink
                .forRowFormat(new Path(FILE_SAVE_PATH), new SimpleStringEncoder<String>("UTF-8"))
                .withBucketAssigner(new DateTimeBucketAssigner("yyyyMMdd"))//文件夹名称,hive建表读取文件路径
                .withRollingPolicy(rollingPolicy)
                .build();
		//文件sink
        dataStream.addSink(fileSink).name("文件输出hdfs").uid("sink to hdfs")
                .setParallelism(10);
	保存到hdfs 会存在2中文件形式
	
	1:正在写入的文件

springboot和flink 大数据实时写入hdfs_第1张图片
2:写入完成的文件
springboot和flink 大数据实时写入hdfs_第2张图片
hsql读取hdfs文件, 根据文件夹名称获取指定数据

create external table if not exists tmp.table_name_${dt} (
	column1 string,
	column1 string,
	column1 string,
	column1 string,
	column1 string,
	column1 string
)
row format delimited fields terminated by '|' --数据分隔符
stored as orc
location '/user/hive/warehouse/flink/dir_name/${dt}/';

二:springboot
1:先将文件存储到本地

private void saveFile(List<String> list, String di) {
        try {
            File folder = new File(fileConfig.getLocalPath());//文件夹
            if (!folder.exists()) {
                folder.mkdirs();
            }
            String localFile = fileConfig.formatLocalPath(di);//文件,样例按分钟创建,可以自己选择时间跨度分隔文件大小
            File file = new File(localFile);
            if (!file.exists()) {
                file.createNewFile();
            }
            FileWriter fileWriter = new FileWriter(file, true);//追加写入
            BufferedWriter bufferedWriter = new BufferedWriter(fileWriter);
            for (String data : list) {
                bufferedWriter.write(data);
                bufferedWriter.newLine();
            }
            bufferedWriter.flush();
            bufferedWriter.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
2:本地文件

springboot和flink 大数据实时写入hdfs_第3张图片
3:上传hdfs

//定时任务执行 .withSchedule(CronScheduleBuilder.cronSchedule("10 */1 * * * ?"))
//遍历本地文件并上传
public void process() {
        try {
            File folder = new File(localPath);
            File[] files = folder.listFiles();
            if (null != files && files.length > 0) {
                for (File file : files) {
                    String fileName = file.getName();
                    String fileDt = fileName.substring(0, 8);
                    //上传2分钟前产生的文件
                    if (checkFileDone(fileName)) {
                        uploadFile(fileDt, file);
                    }
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
	//上传
    private void uploadFile(String dt, File file) {
        try {
            log.info("上传文件:[{}]", file.getName());
            Configuration configuration = new Configuration();
            FileSystem fs = FileSystem.get(new URI(hdfsPath), configuration, "hive"); //hive 具有写入权限的用户名
            //动态创建文件夹
            fs.mkdirs(new Path(hdfsPath  + dt));
            //将本地文件上传到hdfs成功后,删除本地文件
            fs.copyFromLocalFile(true, new Path(file.getPath()), new Path(hdfsPath + dt));
            fs.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
	//选择2分钟前产生的文件
	private final static DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyyMMddHHmm");
	
    private boolean checkFileDone(String dateStr) {
        try {
            LocalDateTime target = LocalDateTime.parse(dateStr, dtf);
            LocalDateTime current = LocalDateTime.now();
            Duration duration = Duration.between(target, current);
            if (duration.toMinutes() > 2) return true; else return false;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }
4: hive读写同上

你可能感兴趣的:(hdfs,大数据,hdfs,spring,boot)