一、安装hadoop客户端
- 在master节点,上传hadoop安装包到/opt/greenplum目录,hadoop客户端的版本根据hadoop集群版本而定,执行如下命令进行解压:
tar -xvzf hadoop-2.6.0-cdh5.7.6.tar.gz
- 修改/opt/greenplum/hadoop-2.6.0-cdh5.7.6/etc/hadoop相关配置,配置成功能够访问hdfs集群即可
- 创建软链接
cd /opt/greenplum
ln -s hadoop-2.6.0-cdh5.7.6 hadoop
- 配置环境变量,编辑~/.bash_profile文件,添加如下内容:
export HADOOP_HOME=/opt/greenplum/hadoop
export PATH=$HADOOP_HOME/bin:$PATH
source ~/.bash_profile
hdfs dfs -ls /
二、配置hadoop客户端到集群其他服务器
- 在master节点上将配置好的hadoop包进行打包
tar -cvzf hadoop.tar.gz hadoop-2.6.0-cdh5.7.6
- 分发到集群的其他节点,other_hosts为除了master节点外的其他服务器主机列表。
gpscp -f /opt/greenplum/other_hosts hadoop.tar.gz =:/opt/greenplum/
gpssh -f /opt/greenplum/other_hosts
=> cd /opt/greenplum
[kylin-203-122]
[kylin-203-117]
[kylin-203-130]
=> tar -xvzf hadoop.tar.gz
...
=> ln -s hadoop-2.6.0-cdh5.7.6 hadoop
[kylin-203-122]
[kylin-203-117]
[kylin-203-130]
=> exit
gpscp -f /opt/greenplum/other_hosts ~/.bash_profile =:~/.bash_profile
三、修改gp配置
gpconfig -c gp_hadoop_target_version -v "cdh"
gpconfig -c gp_hadoop_home -v "/opt/greenplum/hadoop"
#执行该命令使配置生效
gpstop -u
#执行如下命令查看配置是否生效
gpconfig --show gp_hadoop_target_version
gpconfig --show gp_hadoop_home
四、导入数据到gp中
create table public.kylin_sales_dyprt (
trans_id bigint,
part_dt date,
lstg_format_name character varying,
leaf_categ_id bigint,
lstg_site_id int,
slr_segment_cd smallint,
price decimal (19, 4),
item_count bigint,
seller_id bigint,
buyer_country character varying,
seller_country character varying,
user_id character varying,
region character varying
) WITH (
appendonly = true,
orientation = column,
compresstype = zlib,
COMPRESSLEVEL = 5
) distributed randomly partition by range (part_dt) (
partition p_20120101 start ('2012-01-01' ::date) inclusive
end ('2012-01-02' ::date)
) ;
CREATE EXTERNAL TABLE public.hdfs_kylin_sales_dyprt (
trans_id BIGINT,
part_dt DATE,
lstg_format_name CHARACTER VARYING,
leaf_categ_id BIGINT,
lstg_site_id INT,
slr_segment_cd SMALLINT,
price DECIMAL (19, 4),
item_count BIGINT,
seller_id BIGINT,
buyer_country CHARACTER VARYING,
seller_country CHARACTER VARYING,
user_id CHARACTER VARYING,
region CHARACTER VARYING
) LOCATION (
'gphdfs://bigdatacluster:8888/user/hive/warehouse/test.db/kylin_sales_dyprt/year=2012/month=1/day=1'
) FORMAT'text' (
DELIMITER ','
NULL '\N' ESCAPE 'off'
) ENCODING 'UTF8' ;
insert into kylin_sales_dyprt select * from hdfs_kylin_sales_dyprt ;