Hive使用脚本加载数据

方式一:直接写在脚本中

load_track_logs.sh:

#!/bin/sh

## 环境变量生效
. /etc/profile

## HIVE HOME
HIVE_HOME=/opt/cdh-5.3.6/hive-0.13.1-cdh5.3.6

## 日志目录
LOG_DIR=/datas/tracklogs

## 获取昨天的日期
yesterday=`date -d -1days '+%Y%m%d'`
####### echo "yesterday is ${yesterday}"

## 在此需要判断${LOG_DIR}/${yesterday} 是否存在

## 循环遍历目录中的文件名称
for line in `ls ${LOG_DIR}/${yesterday}`
do
  echo "loading ${line} to db_track.yhd_log_load"
  ## echo "prepare load the file: ${line}"
  ## 2016   11  18   19
  date=${line:0:4}${line:4:2}${line:6:2} 
  # date = ${line:0:8}
  hour=${line:8:2}
  ## echo "date: ${date}, hour: ${hour}"

  ${HIVE_HOME}/bin/hive -e "load data local inpath '${LOG_DIR}/${yesterday}/${line}' into table db_track.yhd_log_load partition (date = '${date}', hour = '${hour}') ;"
done

方式二:脚本调用sql文件

load_track_logs.sh:

#!/bin/sh

## 环境变量生效
. /etc/profile

## HIVE HOME
HIVE_HOME=/opt/cdh-5.3.6/hive-0.13.1-cdh5.3.6

## 日志目录
LOG_DIR=/datas/tracklogs

## 脚本目录
SCRIPT_DIR=/home/beifeng

## 获取昨天的日期
yesterday=`date -d -1days '+%Y%m%d'`
####### echo "yesterday is ${yesterday}"

## 在此需要判断${LOG_DIR}/${yesterday} 是否存在

## 循环遍历目录中的文件名称
for line in `ls ${LOG_DIR}/${yesterday}`
do
  echo "loading ${line} to db_track.yhd_log_load"
  ## echo "prepare load the file: ${line}"
  ## 2016   11  18   19
  date=${line:0:4}${line:4:2}${line:6:2} 
  # date = ${line:0:8}
  hour=${line:8:2}
  ## echo "date: ${date}, hour: ${hour}"
  load_file=${LOG_DIR}/${yesterday}/${line}

  ${HIVE_HOME}/bin/hive --hiveconf LOAD_FILE_PARAM=${load_file} --hiveconf DATE_PARAM=${date} --hiveconf HOUR_PARAM=${hour} -f ${SCRIPT_DIR}/load_data.sql 
done

load_data.sql:


load data local inpath '${hiveconf:LOAD_FILE_PARAM}' into table db_track.yhd_log_load partition (date = '${hiveconf:DATE_PARAM}', hour = '${hiveconf:HOUR_PARAM}') ;

你可能感兴趣的:(笔记,大数据学习)