查看一下简单的文本内容
grunt> cat A;
0,1,2
1,3,4
grunt> a = load 'A' usingPigStorage(',')as(c1:int,c2:double,c3:float);
grunt> b = foreach a generate $0+$1 asc1_c2;
grunt>dump b;
(1.0)
(4.0)
grunt> c = foreach b generate c1_c2-1;
2013-09-26 16:11:13,124 [main] WARN org.apache.pig.PigServer - EncounteredWarning IMPLICIT_CAST_TO_DOUBLE 4 time(s).
grunt> dump c;
(0.0)
(3.0)
grunt> describe c;
2013-09-26 16:44:10,247 [main] WARN org.apache.pig.PigServer - EncounteredWarning IMPLICIT_CAST_TO_DOUBLE 4 time(s).
c: {double}
grunt> d = foreach c generate$0,100/10,100%10;
2013-09-26 16:44:54,994 [main] WARN org.apache.pig.PigServer - EncounteredWarning IMPLICIT_CAST_TO_DOUBLE 4 time(s).
grunt> dump d;
(0.0,10,0)
(3.0,10,0)
Details at logfile:/home/pig/pig-0.11.0/logs
grunt> e = foreach d generate($0==0?$1:$2);
grunt> dump e;
(10)
(0)
grunt> f = filter a by c1>0 orc2>=1 and c1<3 or c3 is not null;
(0,1.0,2.0)
(1,3.0,4.0)
grunt> cat studentOut
1:xiaohouzi:25
2:xiaohouzi2:24
3:xiaohouzi3:23
grunt> a = load 'studentOut' using PigStorage(':')as(c1:long,c2:chararray,c3:int);
grunt> b = filter a by c1!=1 and c3>=24 and (c2 matches '.*hou.*') ;
grunt> dump b;
(2,xiaohouzi2,24)
关于正则匹配模式matches
http://docs.oracle.com/javase/1.5.0/docs/api/java/util/regex/Pattern.html
Tuple 用法
grunt> cat tuple.txt
(1,2,3)
(2,3,4)
(3,4,5)
(2,3,5)
grunt> a = load 'tuple.txt' ast1:tuple(c1:int,c2:int,c3:int);
grunt> b = foreach a generatet1.c1,t1.c2,t1.c3;
grunt> dump b;
(1,2,3)
(2,3,4)
(3,4,5)
(2,3,5)
Bag用法
grunt> a = load 'A' usingPigStorage(',')as(c1:int,c2:double,c3:float);
grunt> b = group a by(c1,c2);
grunt> ILLUSTRATE b;
------------------------------------------------------------------------------------------------------------
| a | c1:int | c2:double | c3:float |
-----------------------------------------------------
| | 1 | 3.0 | 4.0 |
| | 1 | 3.0 | 4.0 |
-----------------------------------------------------
--------------------------------------------------------------------------------------------------------------------
| b | group:tuple(c1:int,c2:double) | a:bag{:tuple(c1:int,c2:double,c3:float)} |
--------------------------------------------------------------------------------------------------------------------
| | (1, 3.0) | {(1, 3.0,4.0), (1, 3.0, 4.0)} |
grunt> dump b;
((0,1.0),{(0,1.0,2.0)})
((1,3.0),{(1,3.0,4.0)})
Map用法
grunt> catmap.txt
1,[open#apache]
2,[apache#hadoop]
3,[hadoop#pig]
4,[pig#grunt]
grunt> a =load'map.txt' using PigStorage(',') as (id:int,m1:map[]);
grunt> b =foreach a generate m1#'pig';
grunt> dump b;
()
()
()
(grunt)