大数据环境平台仓库日常跑批整理

1、树型目录环境
/home/hs/opt/dw-etl
│  date.list
│  dw_batch.sh
│  sor-all.sh
│  sor-rds.sh
│  ssa-all.sh

├─bigtab_int
│      init-99dianzhangitem.sh
│      init-99dingdan.sh
│      init-99dingdanfood.sh
│      ssa-increment_data_int_except3tab.sh

├─data
├─etl-script
│  ├─dpa-hive2mysql
│  │  │  date.list
│  │  │  dpa-hive2mysql1.sh
│  │  │  dpa-hive2mysql2.sh
│  │  │
│  │  └─data-dpa
│  ├─sor
│  │      TMD_ORGANIZATION_INFO.sql
│  │      TSF_ACCOUNT_INFO.sql
│  │      TSF_AUTH_CONTACTS_INFO.sql
│  │      TSF_AUTH_SETUPANDPOSITION_INFO.sql
│  │
│  ├─sor-pyproc
│  │      hezi_item.py
│  │
│  ├─ssa-hive2mysql
│  │  │  date.list
│  │  │  ssa-hive2mysql.sh
│  │  │  ssa-hive2mysql_increment_int.sh
│  │  │  ssa-hive_data_rollback.hql
│  │  │  ssa_data_current_all_proc.hql
│  │  │
│  │  └─data-ssa
│  ├─ssa-increment
│  │  ├─db99jifen
│  │  │      src2ssa_c_credit.sh
│  │  │      src2ssa_c_lottery_ticket.sh
│  │  │
│  │  ├─db99jinrong
│  │  │      src2ssa_f_auth_call.sh
│  │  │      src2ssa_f_auth_contacts.sh
│  │  │
│  │  ├─db99wejuan
│  │  │      src2ssa_wejuan_questionnaire_record.sh
│  │  │
│  │  └─db99huanx
│  │          src2ssa_99_coupon.sh
│  │          src2ssa_99_dianzhangbestpurchaseitem.sh
│  │
│  ├─ssa-increment-int
│  │  ├─db99jifen
│  │  │      src2ssa_c_credit.sh
│  │  │
│  │  ├─db99jinrong
│  │  │      src2ssa_f_auth_call.sh
│  │  │
│  │  ├─db99wejuan
│  │  │      src2ssa_wejuan_questionnaire_record.sh
│  │  │
│  │  └─db99huanx
│  │          src2ssa_99_coupon.sh
│  │          src2ssa_99_dianzhangbestpurchaseitem.sh
│  │
│  └─ssa-total
│      ├─db99hezi
│      │      src2ssa_99_dianzhang.sh
│      │      src2ssa_b_hezi.sh
│      │
│      ├─db99jifen
│      │      src2ssa_c_coupon_template.sh
│      │
│      ├─db99drink
│      │      src2ssa_d_dingdan.sh
│      │      src2ssa_d_dingdan_item.sh
│      │
│      ├─db99jinrong
│      │      src2ssa_dict.sh
│      │      src2ssa_dict_type.sh
│      │      src2ssa_f_account.sh
│      │
│      ├─db99wejuan
│      │      src2ssa_wejuan_questionnaire.sh
│      │      src2ssa_wejuan_questionnaire_base_record.sh
│      │
│      ├─db99print
│      │      src2ssa_p_dingdan.sh
│      │      src2ssa_p_dingdan_detail.sh
│      │
│      ├─db99huanx
│      │      src2ssa_99_city.sh
│      │      src2ssa_99_cityitem.sh
│      │
│      └─db99team
│              src2ssa_99_dianzhangteam.sh

└─script_generate
    │  exec_hivessa_create_tab.sh
    │  exec_rdsssa_create_tab.sh
    │  rds-increment_tab_should_create_idx_sql.sh
    │  ssa-create_hive2mysql_increment_int_script.sh
    │  ssa-create_hive2mysql_script.sh
    │  ssa-create_hivetab_script.sh
    │  ssa-create_rdstab_script1.sh
    │  ssa-create_rdstab_script2.sh
    │  ssa-create_src2ssa_increment_int_script.sh
    │  ssa-create_src2ssa_increment_script.sh
    │  ssa-create_src2ssa_total_script.sh
    │  ssa-hive_increment_data_rollback.sh
    │  ssa-increment_data_create_current_all_script.sh
    │  table_create.list
    │  table_create_increment.list
    │  table_create_total.list
    │
    └─script_dir
        │  rds-increment_tab_should_create_idx.sql
        │  second_day_ssa_data_rollback.hql
        │  ssa-hive_increment_data_rollback.hql
        │
        ├─hive-ssa_create_tab_script
        │      99_dianzhang.hql
        │
        └─rds-ssa_create_tab_script
                99_dianzhang.sql
               
2、每天定时调度的总控脚本
[hs@master ~]$ crontab -l
9 5 * * * sh /home/hs/opt/dw-etl/dw_batch.sh

cat /home/hs/opt/dw-etl/dw_batch.sh
#!/bin/bash
export yesterday=`date -d last-day +%Y-%m-%d`
#echo -n "please enter a day for runing :"
#read yesterday

export ytd=${yesterday//-/}

#src data download and load in ssa
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/ssa-all.sh] exec start ... " >>/home/hs/opt/dw-etl/dw_batch.log
sh /home/hs/opt/dw-etl/ssa-all.sh $ytd
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/ssa-all.sh] exec finished !" >>/home/hs/opt/dw-etl/dw_batch.log

if false;then
#generate ssa current all data
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[/home/hs/opt/hive-1.2.1/bin/hive -f /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa_data_current_all_proc.hql] exec start ... " >>/home/hs/opt/dw-etl/dw_batch.log
/home/hs/opt/hive-1.2.1/bin/hive -f /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa_data_current_all_proc.hql
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[/home/hs/opt/hive-1.2.1/bin/hive -f /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa_data_current_all_proc.hql] exec finished !" >>/home/hs/opt/dw-etl/dw_batch.log
fi

#put ssa ca data on dw-rds
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa-hive2mysql.sh] exec start ... " >>/home/hs/opt/dw-etl/dw_batch.log
sh /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa-hive2mysql.sh $yesterday
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa-hive2mysql.sh] exec finished !" >>/home/hs/opt/dw-etl/dw_batch.log

#run sor data on dw-rds
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/sor-rds.sh] exec start ... " >>/home/hs/opt/dw-etl/dw_batch.log
sh /home/hs/opt/dw-etl/sor-rds.sh $ytd
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/sor-rds.sh] exec finished !" >>/home/hs/opt/dw-etl/dw_batch.log

cat manual_dw_batch.sh
#!/bin/bash
#export yesterday=`date -d last-day +%Y-%m-%d`
echo -n "please enter a day for runing :"
read yesterday

export ytd=${yesterday//-/}

#src data download and load in ssa
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/ssa-all.sh] exec start ... " >>/home/hs/opt/dw-etl/dw_batch.log
sh /home/hs/opt/dw-etl/ssa-all.sh $ytd
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/ssa-all.sh] exec finished !" >>/home/hs/opt/dw-etl/dw_batch.log

if false;then
#generate ssa current all data
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[/home/hs/opt/hive-1.2.1/bin/hive -f /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa_data_current_all_proc.hql] exec start ... " >>/home/hs/opt/dw-etl/dw_batch.log
/home/hs/opt/hive-1.2.1/bin/hive -f /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa_data_current_all_proc.hql
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[/home/hs/opt/hive-1.2.1/bin/hive -f /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa_data_current_all_proc.hql] exec finished !" >>/home/hs/opt/dw-etl/dw_batch.log
fi

#put ssa ca data on dw-rds
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa-hive2mysql.sh] exec start ... " >>/home/hs/opt/dw-etl/dw_batch.log
sh /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa-hive2mysql.sh $yesterday
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/ssa-hive2mysql.sh] exec finished !" >>/home/hs/opt/dw-etl/dw_batch.log

#run sor data on dw-rds
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/sor-rds.sh] exec start ... " >>/home/hs/opt/dw-etl/dw_batch.log
sh /home/hs/opt/dw-etl/sor-rds.sh $ytd
echo `date +"%Y-%m-%d %H:%M:%S"` >>/home/hs/opt/dw-etl/dw_batch.log
echo "$yesterday,[sh /home/hs/opt/dw-etl/sor-rds.sh] exec finished !" >>/home/hs/opt/dw-etl/dw_batch.log


3、ssa级总控
ssa-all.sh
#!/bin/bash
#export yesterday=`date -d last-day +%Y%m%d`
export yesterday=$1

#src2ssa data proc ......
for src_db in /home/hs/opt/dw-etl/etl-script/ssa-increment/db59* /home/hs/opt/dw-etl/etl-script/ssa-total/db59*;
do
  echo `date "+%Y-%m-%d %H-%M-%S"`,start exec $src_db shell script... 
  for src2ssa_script in $src_db/*.sh;
  do
   sh $src2ssa_script $yesterday
  done
  echo `date "+%Y-%m-%d %H-%M-%S"`,$src_db shell script exec finished!
done


4、hive库ssa数据装载到rds
ssa-hive2mysql.sh
#!/bin/bash
export yesterday=`date -d last-day +%Y-%m-%d`


#99_youhuiquan table current all data increment download from hive and upload to ssa rds...
/home/hs/opt/hive-1.2.1/bin/hive -e "use ssa;insert overwrite local directory '/home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/data-ssa/99_youhuiquan' row format delimited fields terminated by '|' select * from 99_youhuiquan where substr(update_time,1,10)='$yesterday';"
/usr/local/bin/mysql -hiparrdess.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use tmp;drop table if exists 99_youhuiquan_today;create table 99_youhuiquan_today as select * from ssa.99_youhuiquan where 1=2;"
#loop load dir file to rds 
for tabdt_path in /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/data-ssa/99_youhuiquan/*;
do
/usr/local/bin/mysql -hiparrdess.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use tmp;load data local infile '$tabdt_path' into table 99_youhuiquan_today fields terminated by '|' enclosed by '' lines terminated by '\n' ignore 0 lines;"
done
/usr/local/bin/mysql -hiparrdess.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use tmp;alter table tmp.99_youhuiquan_today add index idx_99_youhuiquan_item_id (item_id) using btree;delete ca.* from ssa.99_youhuiquan ca left join tmp.99_youhuiquan_today i on ca.item_id = i.item_id where i.item_id is not null;insert into ssa.99_youhuiquan select * from tmp.99_youhuiquan_today;"

#99_dzcaigou table current all data increment download from hive and upload to ssa rds...
/home/hs/opt/hive-1.2.1/bin/hive -e "use ssa;insert overwrite local directory '/home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/data-ssa/99_dzcaigou' row format delimited fields terminated by '|' select * from 99_dzcaigou where substr(update_time,1,10)='$yesterday';"
/usr/local/bin/mysql -hiparrdess.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use tmp;drop table if exists 99_dzcaigou_today;create table 99_dzcaigou_today as select * from ssa.99_dzcaigou where 1=2;"
#loop load dir file to rds 
for tabdt_path in /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/data-ssa/99_dzcaigou/*;
do
/usr/local/bin/mysql -hiparrdess.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use tmp;load data local infile '$tabdt_path' into table 99_dzcaigou_today fields terminated by '|' enclosed by '' lines terminated by '\n' ignore 0 lines;"
done
/usr/local/bin/mysql -hiparrdess.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use tmp;alter table tmp.99_dzcaigou_today add index idx_99_dzcaigou_item_id (item_id) using btree;delete ca.* from ssa.99_dzcaigou ca left join tmp.99_dzcaigou_today i on ca.item_id = i.item_id where i.item_id is not null;insert into ssa.99_dzcaigou select * from tmp.99_dzcaigou_today;"
.....................


5、hive库sor级总控
sor-all.sh
#/bin/bash
export yesterday=`date -d last-day +%Y-%m-%d`

#cd /home/hs/opt/dw-etl/etl-script/sor/
for sor_script in /home/hs/opt/dw-etl/etl-script/sor/*.sql; 
do
hive -hiveconf yesterday=$yesterday -f  $sor_script;
done


6、sor-rds总控
sor-rds.sh
#/bin/bash
export yesterday=`date -d last-day +%Y-%m-%d`

#cd /home/hs/opt/dw-etl/etl-script/sor/
for sor_script in /home/hs/opt/dw-etl/etl-script/sor/*.sql; 
do
hive -hiveconf yesterday=$yesterday -f  $sor_script;
done
[hs@master dw-etl]$ cat sor-rds.sh 
#/bin/bash
export yesterday=`date -d last-day +%Y%m%d`

python /home/hs/opt/dw-etl/etl-script/sor-pyproc/box_item.py
/usr/local/bin/mysql -hrdsiparrdess.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use sor;call procedure_batch_call($yesterday); "


7、rds中sor批量调度的存储过程
CREATE DEFINER=`datauser`@`%` PROCEDURE `procedure_batch_call`(`p_etldate` int)
BEGIN

  #存储过程调用开始
	call p_tmd_organization_info(p_etldate);
	call p_tsf_account_info(p_etldate);
	call p_tsf_auth_contacts_info(p_etldate);
	..........

  #调用完成返回信息
	#RETURN 'procedure call finished!';

END


8、处理json数据的中间python脚本
hezi_item.py
# -*- coding:utf-8 -*-
# adjust dormitem every day
import MySQLdb
import json
import warnings
warnings.filterwarnings("ignore")

db_config = {
    'host': 'rdsiparrdess.mysql.rds.aliyuncs.com',
    'user': 'datauser',
    'passwd': 'iloveyou',
    'port': 3306,
    'db': 'ssa'
}

def getDB():
    try:
        conn = MySQLdb.connect(host=db_config['host'],user=db_config['user'],passwd=db_config['passwd'],port=db_config['port'])
        conn.autocommit(True)
        curr = conn.cursor()
        curr.execute("SET NAMES utf8");
        curr.execute("USE %s" % db_config['db']);
    
        return conn, curr
    except MySQLdb.Error,e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        return None, None

conn, curr = getDB()

i = 0

'''
sql = "SELECT max(order_id) FROM b_dingdan_item"
curr.execute(sql)
max_order_ids = curr.fetchall()
for item in max_order_ids:
    max_order_id = item[0]

sql = "SELECT order_id,detail_json FROM b_dingdan where order_id > %d" % (max_order_id)

'''

sql = "truncate table b_dingdan_item;"
curr.execute(sql)

sql = "SELECT order_id,detail_json FROM b_dingdan " 

curr.execute(sql)
foodbox_item_list = curr.fetchall()
for foodbox_item in foodbox_item_list:
    order_id = foodbox_item[0]
    foodbox_item1 = foodbox_item[1]
    items = json.loads(foodbox_item1)
    for x in items:
        rid = x['rid']
        quantity = x['quantity']
        price = x['price']
        amount = x['amount']
        sql = "insert into b_dingdan_item(order_id,rid,quantity,price,amount) values (%d,%d,%d,%.2f,%.2f)" % (order_id,rid,quantity,price,amount)
        print sql
        curr.execute(sql)
        i += 1

curr.close()
conn.close()

print 'done'
print 'total rows:' + str(i)


9、ssa层增量数据初始化调度脚本
ssa-increment_data_int_except3tab.sh
#!/bin/bash
export yesterday=`date -d last-day +%Y%m%d`

#src2ssa data increment load int proc ......
for src_db in /home/hs/opt/dw-etl/etl-script/ssa-increment-int/db99*;
do
  echo `date "+%Y-%m-%d %H-%M-%S"`,start exec $src_db shell script... 
  for src2ssa_script in $src_db/*.sh;
  do
   if [ $src2ssa_script = "/home/hs/opt/dw-etl/etl-script/ssa-increment-int/db99store/src2ssa_99_dianzhangitem.sh" ] || [ $src2ssa_script = "/home/hs/opt/dw-etl/etl-script/ssa-increment-int/db99store/src2ssa_99_dingdan.sh" ] || [ $src2ssa_script = "/home/hs/opt/dw-etl/etl-script/ssa-increment-int/db99store/src2ssa_99_dingdanfood.sh" ];
   then
     echo $src2ssa_script,The table is too big, not on the loading!
   else
     #echo $src2ssa_script
     sh $src2ssa_script $yesterday
   fi 
  done
  echo `date "+%Y-%m-%d %H-%M-%S"`,$src_db shell script exec finished!
done


其他3个(主要由于数据量太大,所以根据主键字段最后一位或两位分10次或100次单独处理):
init-99dianzhangitem.sh
#/bin/bash
export yesterday=`date -d last-day +%Y%m%d`
#echo -n "please enter a day for runing :"
#read yesterday

#99_dingdanitem data proc...
mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=0;" > /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=1;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=2;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=3;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=4;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=5;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=6;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=7;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=8;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

mysql -hrdsipaddress -udbreader -piloveyou -N -e"select CONCAT(ifnull(item_id,''),'|',ifnull(dingdan_id,''),'|',ifnull(rid,''),'|',ifnull(status,''),'|',ifnull(update_time,'')) from db99huanx.99_dingdanitem where DATE_FORMAT(update_time,'%Y%m%d')<=$yesterday and substr(rid,-1,1)=9;" >> /home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat

hive -e "use ssa;truncate table 99_dingdanitem;load data local inpath '/home/hs/opt/dw-etl/data/db99huanx.99_dingdanitem_$yesterday.dat' into table 99_dingdanitem;"

#99_dingdanitem table current all data download from hive and upload to ssa rds...
/home/hs/opt/hive-1.2.1/bin/hive -e "use ssa;insert overwrite local directory '/home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/data-ssa/99_dingdanitem' row format delimited fields terminated by '|' select * from 99_dingdanitem;"
/usr/local/bin/mysql -hrdsipaddress.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use ssa;truncate table 99_dingdanitem;"
#loop load dir file to rds 
for tabdt_path in /home/hs/opt/dw-etl/etl-script/ssa-hive2mysql/data-ssa/99_dingdanitem/*;
do
/usr/local/bin/mysql -hrdsipaddress.mysql.rds.aliyuncs.com -udatauser -piloveyou -e "use ssa;load data local infile '$tabdt_path' into table 99_dingdanitem fields terminated by '|' enclosed by '' lines terminated by '\n' ignore 0 lines;"
done

.........

10、其他具体实现脚本
其他数据转换清洗的具体脚本,通过shell脚本自动生成(见上一篇: http://blog.csdn.net/babyfish13/article/details/50971817),在此不作多述。

你可能感兴趣的:(hadoop,mysql,hive,大数据)