sqoop导入和导出数据脚本

sqoop导入和导出数据脚本

一、将mysql数据导入到Hive

sqoop import \
--connect jdbc:mysql://:3306/bd?characterEncoding=utf-8 \
--username root \
--password '' \
--table tongji_1 \
--delete-target-dir  \
--hive-import \
--hive-overwrite \
--null-string '\\N' \
--null-non-string '\\N' \
--hive-table tongji \
--hive-drop-import-delims \
--fields-terminated-by '|' \
-m 1

二、Hive导出数据到mysql

sqoop export \
--connect jdbc:mysql://:3306/bd \
--username root \
--password ''\
--table tongji_1 \
--export-dir hdfs://:8020/user/hive/warehouse/tongji/000000_0 \
--input-fields-terminated-by '|' \
-m 1 \

三、hdfs数据加入到Hive中

#!/bin/bash
datenow=`date --date='0 days ago' "+%Y-%m-%d"`
echo ${datenow}
#将数据从hdfs导入到hive
hive -e "ALTER TABLE flux ADD  PARTITION(reportTime='${datenow}')location '/flow/reportTime=${datenow}';"
#将数据导入到数据清洗表
hive -e "insert into table dataclear
        select reportTime,url,urlname,stat_uv,split(stat_ss,'_')[0],split(stat_ss,'_')[1],split(stat_ss,'_')[2],cip 
from flux 
        WHERE reportTime='${datenow}';"
#对数据做统计
hive -e "insert into table tongji  
	select '${datenow}',tab1.pv,tab2.uv,tab3.vv,tab4.br,tab5.newip,tab6.newcust,tab7.avgtime,tab8.avgdeep 
from 
	(select count(*) as pv from dataclear where reportTime = '${datenow}') as tab1,
	(select count(distinct uvid) as uv from dataclear where reportTime='${datenow}') as tab2,
	(select count(distinct ssid) as vv from dataclear where reportTime='${datenow}') as tab3,
	(select round(br_a_tab.br_count/br_b_tab.vv_count,4) as br from (select count(*) as br_count from (select ssid from dataclear where reportTime = '${datenow}' group by ssid having count(*) = 1) as br_tab) as br_a_tab,(select count(distinct ssid) as vv_count from dataclear where reportTime='${datenow}') as br_b_tab) as tab4,
	(select count(distinct dataclear.cip) as newip from dataclear where reportTime='${datenow}' and dataclear.cip not in (select inner_dataclear.cip from dataclear as inner_dataclear where datediff(inner_dataclear.reportTime,'${datenow}')<0)) as tab5,
	(select count(distinct dataclear.uvid) as newcust from dataclear where reportTime='${datenow}' and dataclear.uvid not in (select inner_dataclear.uvid from dataclear as inner_dataclear where datediff(inner_dataclear.reportTime,'${datenow}')<0)) as tab6,
	(select round(avg(usetime),4) as avgtime from (select max(sstime) - min(sstime) as usetime from dataclear where reportTime='${datenow}' group by ssid) as avgtime_tab) as tab7,
	(select round(avg(deep),2) as avgdeep from (select count(distinct urlname) as deep from dataclear where reportTime='${datenow}' group by ssid) as avgdeep_tab) as tab8;"

你可能感兴趣的:(大数据,sqoop)