sqoop+hive+shell+oozie 示例

示例

sqoop+hive+shell+oozie 示例_第1张图片


create.q

create external table IF NOT EXISTS default.SYS_ACCOUNT
(
  account_id           double,
  account_name         string,
  account_password     string,
  status               int,
  last_login_device_id string,
  band_device_id       string,
  key                  string,
  last_login_ip        string,
  account_type         int,
  lock_time            DATE,
  account_password_ver string
)
partitioned by (date_time string) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
location '/data/hive/sys_account';

job.properties


nameNode=hdfs://fuze250:8020
jobTracker=fuze250:8032
#nameNode=hdfs://nameservice
#jobTracker=yarnRM
queueName=default
oozieRoot=oozie
hiveRoot=hive

oozie.use.system.libpath=true
oozie.libpath=/user/oozie/share/lib
oozie.wf.application.path=${nameNode}/user/${user.name}/${oozieRoot}/sqoop/sys_account_user_vip

procedure.q


ALTER TABLE default.SYS_ACCOUNT ADD PARTITION (date_time=${date_time}) location '/data/hive/sys_account/date_time=${date_time}';

script.sh

#!/bin/bash
DATE=$(date +%Y%m%d);
echo date_time=$DATE

workflow.xml

自己写的




	

	
		
			${jobTracker}
			${nameNode}
			jdbc:hive2://fuze248:10000/default
			
				
					mapred.job.queue.name
					${queueName}
				
			
			
		
		
		
	

	
		
			${jobTracker}
			${nameNode}
			
				
					mapred.job.queue.name
					${queueName}
				
			
			script.sh
			script.sh
			
		
		
		
	

	
		
			${jobTracker}
			${nameNode}
			
				
					mapred.job.queue.name
					${queueName}
				
			
			import
			--connect
			jdbc:oracle:thin:@xxx:1521:xxx
			--username
			FUZEPASSPORT
			--SYS_ACCOUNT
			FUZEPASSPORT_PWD
			--query
			select SA.* from SYS_ACCOUNT SA where $CONDITIONS
			--delete-target-dir
			--target-dir
			/data/hive/sys_account/date_time=${(wf:actionData('shell-date')['date_time'])}
			
			--fields-terminated-by
			\001
			-m
			1
		
		
		
	

	
		
			${jobTracker}
			${nameNode}
			jdbc:hive2://fuze248:10000/default
			
				
					mapred.job.queue.name
					${queueName}
				
			
			
			date_time=${(wf:actionData('shell-date')['date_time'])}
			
		
		
		
	

	
		Sqoop failed, error
			message[${wf:errorMessage(wf:lastErrorNode())}]
	
	



workflowhue.xml

hue配置的


    
    
        操作失败,错误消息[${wf:errorMessage(wf:lastErrorNode())}]
    
    
        
            ${jobTracker}
            ${nameNode}
            jdbc:hive2://fuze248:10000/default
            
        
        
        
    
    
        
            ${jobTracker}
            ${nameNode}
            /user/root/oozie/sqoop/sys_account_user_vip/script.sh
            /user/root/oozie/sqoop/sys_account_user_vip/script.sh#script.sh
              
        
        
        
    
    
        
            ${jobTracker}
            ${nameNode}
			import
			--connect
			jdbc:oracle:thin:xxx:1521:xxx
			--username
			FUZEPASSPORT
			--password
			FUZEPASSPORT_PWD
			--query
			select SA.* from SYS_ACCOUNT SA where $CONDITIONS
			--delete-target-dir
			--target-dir
			/data/hive/sys_account/date_time=${(wf:actionData('shell-a94d')['date_time'])}
			
			--fields-terminated-by
			\001
			-m
			1
        
        
        
    
    
        
            ${jobTracker}
            ${nameNode}
            jdbc:hive2://fuze248:10000/default
            
              date_time=${(wf:actionData('shell-a94d')['date_time'])}
        
        
        
    
    



shell 执行其他命令


    
    
        Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
    
    
        
            ${jobTracker}
            ${nameNode}
            sh
              test.sh
            /tmp/shell/test.sh#test.sh
              
        
        
        
    
    

test.sh 文件

#!/bin/bash
sqoop eval --connect jdbc:mysql://xxx:3306/test --username root --password xxx -e "TRUNCATE TABLE tb_test"
spark-submit  --class com.userportrait.OozieTest --master yarn-cluster --num-executors 1 --executor-cores 2 --executor-memory 512M --driver-memory 512M sparktest-1.0-SNAPSHOT.jar


你可能感兴趣的:(cdh)