(1) Hadoop的HDFS里面准备测试数据
查看准备的全部数据文件,以及每个文件大小(共32个文件):
hadoop fs -ls -R -h/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-*
-rw-r--r-- 3 bdi supergroup 32.3 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-0
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-1
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-10
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-11
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25 /result_file/wide_file/20140318/wide_file_gb_2014031813.dat-12
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-13
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-14
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-15
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-16
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-17
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25 /result_file/wide_file/20140318/wide_file_gb_2014031813.dat-18
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-19
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25 /result_file/wide_file/20140318/wide_file_gb_2014031813.dat-2
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-20
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-21
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-22
-rw-r--r-- 3 bdi supergroup 32.0 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-23
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-24
-rw-r--r-- 3 bdi supergroup 32.0 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-25
-rw-r--r-- 3 bdi supergroup 32.0 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-26
-rw-r--r-- 3 bdi supergroup 32.0 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-27
-rw-r--r-- 3 bdi supergroup 32.0 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-28
-rw-r--r-- 3 bdi supergroup 31.9 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-29
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-3
-rw-r--r-- 3 bdi supergroup 31.9 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-30
-rw-r--r-- 3 bdi supergroup 32.0 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-31
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-4
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-5
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-6
-rw-r--r-- 3 bdi supergroup 32.2 M 2014-05-09 14:25 /result_file/wide_file/20140318/wide_file_gb_2014031813.dat-7
-rw-r--r-- 3 bdi supergroup 32.1 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-8
-rw-r--r-- 3 bdisupergroup 32.2 M 2014-05-09 14:25/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-9
查看文件内容总行数:
hadoop fs -cat/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-* | wc -l
4174417
(2) 连接GPDB数据库,创建外部表
CREATE EXTERNAL TABLE wide_table
(
cell_name text,
pro_name text,
region_name text,
city_name text,
ue_imei text,
ms_id text,
lac_ci text,
cell_type text,
group_major_level text,
group_second_level text,
group_third_leveltext,
start_time text,
size_dlall text,
size_ulall text,
size_ul2g text,
size_dl2g text,
size_ul3g text,
size_dl3g text,
serv_fail_cnt text,
serv_excep_cnttext,
session_ul_durations text,
session_dl_durations text,
usage_durationstext,
dns_sel_suc_cnttext,
dns_sel_app_cnttext,
session_suc_cnttext,
session_app_cnttext,
trans_suc_fluxstext,
trans_fluxs text,
size_fluxall text,
attach_suc_cnttext,
attach_app_cnttext,
pdp_suc_cnt text,
pdp_app_cnt text,
rau_suc_cnt text,
rau_app_cnt text,
drop_line_cnt text,
attach_durationstext,
pdp_durations text,
tcp_suc_cnt text,
tcp_app_cnt text,
ser_latency_low_cnttext,
session_dl_lowspeed_cnt text,
sig_line_cnt text,
cell_id text,
province_id text,
region_id text,
city_id text
)
LOCATION ( 'gphdfs://10.41.28.82:11220/result_file/wide_file/20140318/wide_file_gb_2014031813.dat-*')
FORMAT 'text' (delimiter '|')
ENCODING 'UTF8'
LOG ERRORS INTO wide_table_error SEGMENTREJECT LIMIT 1000000 ROWS ;
(3) 查看外部表加载的文件内容行数
select count(1) from wide_table;
NOTICE: Found 4 dataformatting errors (4 or more input rows). Rejected related input data.
count
---------
4174417 ----通过外部表可以查看到所有文件中的内容,没有丢弃数据
(1 row)
(1) 创建表
CREATE TABLE fact_wide_table
(
cell_name text,
pro_name text,
region_name text,
city_name text,
ue_imei text,
ms_id text,
lac_ci text,
cell_type text,
group_major_level text,
group_second_level text,
group_third_leveltext,
start_time text,
size_dlall text,
size_ulall text,
size_ul2g text,
size_dl2g text,
size_ul3g text,
size_dl3g text,
serv_fail_cnt text,
serv_excep_cnttext,
session_ul_durations text,
session_dl_durations text,
usage_durationstext,
dns_sel_suc_cnttext,
dns_sel_app_cnttext,
session_suc_cnttext,
session_app_cnttext,
trans_suc_fluxstext,
trans_fluxs text,
size_fluxall text,
attach_suc_cnttext,
attach_app_cnttext,
pdp_suc_cnt text,
pdp_app_cnt text,
rau_suc_cnt text,
rau_app_cnt text,
drop_line_cnt text,
attach_durationstext,
pdp_durations text,
tcp_suc_cnt text,
tcp_app_cnt text,
ser_latency_low_cnttext,
session_dl_lowspeed_cnt text,
sig_line_cnt text,
cell_id text,
province_id text,
region_id text,
city_id text
) distributed by (ue_imei,ms_id);
(2) 通过外部表,插入数据
noas=# insert into fact_wide_table select *from wide_table;
NOTICE: Found 4 data formatting errors (4 or moreinput rows). Rejected related input data.
INSERT 0 4174417 ------数据全部加载到事实表
(3) 查询
可以查询事实表的前10行看数据是否存在:
select * from fact_wide_table limit 10;