hadoop_2

/usr/lib/hadoop-0.20-mapreduce

sqoop list-tables --connect jdbc:mysql://localhost/movielens --username training --password training

sqoop import --connect jdbc:mysql://localhost/movielens --table movie --fields-terminated-by '\t' --username training --password training

sqoop import --connect jdbc:mysql://localhost/movielens --table movierating --fields-terminated-by '\t' --username training --password training

hadoop fs -tail movierating/part-m-00000

6037
6038
6039
6040


mahout recommenditembased --input movierating --output recs --usersFile users --similarityClassname SIMILARITY_LOGLIKELIHOOD
hadoop fs -tail recs/part-r-00000


hadoop fs -cat movie/part-m-00000 | head

hadoop fs -cat movierating/part-m-00000 | head


create external table movie (id INT, movieid INT, rating INT) row format delimited fields terminated by '\t' location '/user/training/movierating'


create external table movierating (userid INT, movieid INT, rating INT) row format delimited fields terminated by '\t' location '/user/training/movierating'

------------failed----------
movies = load 'movie' AS (movieid, name, year);

recs = load 'recs' AS (userid, reclist);

longlist = FOREACH recs GENERATE userid,
FLATTEN(TOKENIZE(reclist)) AS movieandscore;

finallist = FOREACH recs GENERATE userid,
REGEX_EXTRACT(movieandscore, '(\\d+)', 1) AS movieid;

results = JOIN finallist BY movieid, movies BY movieid;

final = FOREACH results GENERATE userid, name;

srtd = ORDER final BY userid;

dump srtd;
-----------------------

others
hadoop fs -cat [] | tail -n 50    
cd ~
gunzip -c access_log.gz | head -n 500 > a    //if the zip file is too big, just get 500 lines






你可能感兴趣的:(hadoop)