向MapReduce转换:生成用户向量

分两部分:

/***
 * @author YangXin
 * @date 2016/2/21
 * @ info 主要功能是mahout实现解析Wikipedia链接文件的Mapper接口
 */
package unitSix;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.mahout.math.VarLongWritable;

public class WikipediaToItemPrefsMapper extends Mapper{
	private static final Pattern NUMBERS = Pattern.compile("(\\d+)");
	public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
		String line = value.toString();
		Matcher m = NUMBERS.matcher(line);
		//定位用户ID
		m.find();                              
		VarLongWritable 

你可能感兴趣的:(大数据挖掘与大数据应用案例,Java)