1、建一个文本
[root@sandbox ~]# vi /customers
4000001,Kristina,Chung,55,Pilot
4000002,Paige,Chen,74,Teacher
4000003,Sherri,Melton,34,Firefighter
4000004,Gretchen,Hill,66,Computerhardware engineer
4000005,Karen,Puckett,74,Lawyer
4000006,Patrick,Song,42,Veterinarian
4000007,Elsie,Hamilton,43,Pilot
4000008,Hazel,Bender,63,Carpenter
4000009,Malcolm,Wagner,39,Artist
4000010,Dolores,McLaughlin,60,Writer
4000011,Francis,McNamara,47,Therapist
4000012,Sandy,Raynor,26,Writer
4000013,Marion,Moon,41,Carpenter
4000014,Beth,Woodard,65,
4000015,Julia,Desai,49,Musician
4000016,Jerome,Wallace,52,Pharmacist
4000017,Neal,Lawrence,72,Computersupport specialist
4000018,Jean,Griffin,45,Childcareworker
4000019,Kristine,Dougherty,63,Financialanalyst
2、把数据文件读入pig关系
grunt>copyfromlocal /customers/user/pig/
grunt>raw_data = LOAD'/user/pig/customers' USING PigStorage(',') AS (
>> custno:chararray,
>> firstname:chararray,
>> lastname:chararray,
>> age:int,
>> profession:chararray
>> );
grunt>dump raw_data;
(4000001,Kristina,Chung,55,Pilot)
(4000002,Paige,Chen,74,Teacher)
(4000003,Sherri,Melton,34,Firefighter)
(4000004,Gretchen,Hill,66,Computerhardware engineer)
(4000005,Karen,Puckett,74,Lawyer)
(4000006,Patrick,Song,42,Veterinarian)
(4000007,Elsie,Hamilton,43,Pilot)
(4000008,Hazel,Bender,63,Carpenter)
(4000009,Malcolm,Wagner,39,Artist)
(4000010,Dolores,McLaughlin,60,Writer)
(4000011,Francis,McNamara,47,Therapist)
(4000012,Sandy,Raynor,26,Writer)
(4000013,Marion,Moon,41,Carpenter)
(4000014,Beth,Woodard,65,)
(4000015,Julia,Desai,49,Musician)
(4000016,Jerome,Wallace,52,Pharmacist)
(4000017,Neal,Lawrence,72,Computersupport specialist)
(4000018,Jean,Griffin,45,Childcareworker)
(4000019,Kristine,Dougherty,63,Financialanalyst)
3、 Pig把关系写到HBase
在Hbase建一个表
hbase(main):017:0> create'customers','customer_data'
0row(s) in 1.4870 seconds
=>Hbase::Table - customers
hbase(main):018:0> list
TABLE
customers
iemployee
student
t1
tc
5row(s) in 0.0390 seconds
hbase(main):019:0>scan 'customers'
ROW COLUMN+CELL
0row(s) in 0.3840 seconds
grunt> STORE raw_data INTO'hbase://customers' USING org.apache.pig.backend.hadoop.hbase.HBaseStorage(
>> 'customer_data:custno
>> customer_data:firstname
>> customer_data:lastname
>> customer_data:age
>> customer_data:profession'
>> );
hbase(main):023:0>scan 'customers'
ROW COLUMN+CELL
4000001 column=customer_data:age, timestamp=1427249531913, value=Pilot
4000001 column=customer_data:custno, timestamp=1427249531913,value=Kristina
4000001 column=customer_data:firstname, timestamp=1427249531913,value=Chung
4000001 column=customer_data:lastname, timestamp=1427249531913, value=55
4000002 column=customer_data:age, timestamp=1427249531927, value=Teacher
4000002 column=customer_data:custno, timestamp=1427249531927, value=Paige
4000002 column=customer_data:firstname, timestamp=1427249531927, value=Chen
4000002 column=customer_data:lastname,
。
。
。
4000019 column=customer_data:age, timestamp=1427249531931, value=Financialanalyst
4000019 column=customer_data:custno, timestamp=1427249531931,value=Kristine
4000019 column=customer_data:firstname, timestamp=1427249531931,value=Dougherty
4000019 column=customer_data:lastname,timestamp=1427249531931, value=63
19row(s) in 0.2800 seconds
3、 Pig读HBase表
grunt>raw = LOAD 'hbase://customers' USINGorg.apache.pig.backend.hadoop.hbase.HBaseStorage(
>> 'customer_data:custno
>> customer_data:firstname
>> customer_data:lastname
>> customer_data:age
>> customer_data:profession'
>> );
grunt>dump raw;
(Kristina,Chung,55,Pilot,)
(Paige,Chen,74,Teacher,)
(Sherri,Melton,34,Firefighter,)
(Gretchen,Hill,66,Computerhardware engineer,)
(Karen,Puckett,74,Lawyer,)
(Patrick,Song,42,Veterinarian,)
(Elsie,Hamilton,43,Pilot,)
(Hazel,Bender,63,Carpenter,)
(Malcolm,Wagner,39,Artist,)
(Dolores,McLaughlin,60,Writer,)
(Francis,McNamara,47,Therapist,)
(Sandy,Raynor,26,Writer,)
(Marion,Moon,41,Carpenter,)
(Beth,Woodard,65,,)
(Julia,Desai,49,Musician,)
(Jerome,Wallace,52,Pharmacist,)
(Neal,Lawrence,72,Computersupport specialist,)
(Jean,Griffin,45,Childcareworker,)
(Kristine,Dougherty,63,Financial analyst,)