基于用户的相似性函数的定义:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class MapperReduce {
String input="F:\\input.txt";
String output="F:\\output.txt";
public void set_input(String input){
this.input=input;
}
public void set_output(String output){
this.output=output;
}
public void clear_tmpfile() throws IOException{
FileWriter fw=new FileWriter("F:\\map_result.txt");
String line="";
fw.write(line);
fw.close();
}
public MapperReduce(String input,String output){//文件初始化,如果存在则删除
this.input=input;
this.output=output;
File file=new File("F:\\map_result.txt");
if(file.exists()){
file.delete();
}
File file1=new File(this.output);
if(file1.exists()){
file1.delete();
}
}
public Map> shuff() throws IOException{//shuffle过程,将mapper处理结果相同key的记录合并
BufferedReader br=new BufferedReader(new FileReader("F:\\map_result.txt"));
String line="";
Map> map=new HashMap>();
while((line=br.readLine())!=null){
String[] content=line.split(" ",2);
if(map.containsKey(content[0])){
List list=new ArrayList();
list=map.get(content[0]);
if(content.length==2){
list.add(content[1]);
}else{
list.add(" ");
}
map.remove(content[0]);
map.put(content[0],list);
}else{
List list=new ArrayList();
if(content.length==2){
list.add(content[1]);
}else{
list.add(" ");
}
map.put(content[0],list);
}
}
map.remove("");//把key为空格的行删除
Iterator It = map.keySet().iterator();
//while(It.hasNext()){System.out.println(map.get(It.next()));}//测试的时候用
return map;
}
public void IterMapper() throws IOException{
//指定input数据文本路径,循环地按行输入,每一行中分key和value,其中区分标志为第一个空格,按行遍历mapper
BufferedReader br=new BufferedReader(new FileReader(this.input));
String line="";
int count=1;
while((line=br.readLine())!=null){
if(!line.trim().equals("")){
Mapper(String.valueOf(count),line);
}
count +=1;
}
}
public void IterReducer() throws IOException{
//shuffle过程的生成的数据是一个map数据类型记录,按每条记录遍历reducer
Map> map=shuff();
Iterator Iter=map.keySet().iterator();
while(Iter.hasNext()){
String tmp=Iter.next();
Iterator IterList=map.get(tmp).iterator();
Reducer(tmp,IterList);
}
}
public void WriteMaper(String new_key,String new_value) throws IOException{
//指定mapper输出的文本路径,然后写入一行 new_key 与 new_value 用空格空开
FileWriter fw=new FileWriter("F:\\map_result.txt",true);
String line=new_key+" "+new_value+"\r\n";
fw.write(line);
fw.close();
}
public void WriteReducer(String new_key,String new_value) throws IOException{
//指定reducer输出的文件路径,然后写入一行 new_key 与 new_value 用空格空开
FileWriter fw=new FileWriter(this.output,true);
String line=new_key+" "+new_value+"\r\n";
fw.write(line);
fw.close();
}
//map函数开始
public void Mapper(String key,String value) throws IOException{
}
//map函数结束
//reduce函数开始
public void Reducer(String key,Iterator value) throws IOException{
}
//reduce函数结束
}
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class MapReduce0 extends MapperReduce{
public MapReduce0(String input,String output){
super(input,output);
}
//map函数开始
public void Mapper(String key,String value) throws IOException{
//key为行偏移量,value为每一行的值
String[] content=value.split(" ",2);
WriteMaper(content[0],content[1]);
}
//map函数结束
//reduce函数开始
public void Reducer(String key,Iterator value) throws IOException{
List list=new ArrayList();
float sum=0;
int count=0;
while(value.hasNext()){
String tmp=value.next();
list.add(tmp);
sum +=Float.parseFloat(tmp.split(" ")[1]);
count +=1;
}
float avg=sum/count;
for(int i=0;i
import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; public class MapReduce1 extends MapperReduce{ public MapReduce1(String input,String output){ super(input,output); } public void printf(Listlist1,List list2) throws IOException{ if((!list1.isEmpty()) && (!list2.isEmpty())){ Iterator Iter1=list1.iterator(); while(Iter1.hasNext()){ String tmp1=Iter1.next(); Iterator Iter2=list2.iterator(); while(Iter2.hasNext()){ String tmp2=Iter2.next(); if(!tmp1.split(" ")[0].equals(tmp2.split(" ")[0])) {WriteReducer(tmp1,tmp2);} } } } } //map函数开始 public void Mapper(String key,String value) throws IOException{ //key为行偏移量,value为每一行的值 String[] count=value.split(" "); if(!count[1].trim().equals("")){ WriteMaper(count[1].trim(),count[0].trim()+" "+count[2].trim()+" "+"1"); WriteMaper(count[1].trim(),count[0].trim()+" "+count[2].trim()+" "+"2"); } } //map函数结束 //reduce函数开始 public void Reducer(String key,Iterator value) throws IOException{ List list1=new ArrayList (); List list2=new ArrayList (); while(value.hasNext()){ String tmp=value.next(); String[] tmp1=tmp.split(" ",2); if(tmp1[1].split(" ")[1].equals("1")){ list1.add(tmp1[0]+" "+tmp1[1].split(" ")[0]); }else{ list2.add(tmp1[0]+" "+tmp1[1].split(" ")[0]); } } printf(list1,list2); } //reduce函数结束 }
import java.io.IOException; import java.util.Iterator; public class MapReduce2 extends MapperReduce{ public MapReduce2(String input,String output){ super(input,output); } //map函数开始 public void Mapper(String key,String value) throws IOException{ //key为行偏移量,value为每一行的值 String[] content=value.split(" "); //注意new_key中不要有空格,因为shuffle中是以第一个空格为单位进行划分key和value的 WriteMaper(content[0]+","+content[2],content[1]+" "+content[3]); } //map函数结束 //reduce函数开始 public void Reducer(String key,Iteratorvalue) throws IOException{ float p = 0,p1=0,p2=0,tmp1=0,tmp2=0; while(value.hasNext()){ String tmp=value.next(); tmp1=Float.parseFloat(tmp.split(" ")[0]); tmp2=Float.parseFloat(tmp.split(" ")[1]); p=p+tmp1*tmp2; p1=p1+tmp1*tmp1; p2=p2+tmp2*tmp2; } String new_value=String.valueOf(p/(Math.sqrt(p1)*Math.sqrt(p2))); WriteReducer(key,new_value); } //reduce函数结束 public static void main(String[] args) throws IOException { MapReduce0 a0=new MapReduce0("F:\\input.txt","F:\\output_tmp.txt"); a0.IterMapper(); a0.IterReducer(); a0.clear_tmpfile(); MapReduce1 a1=new MapReduce1("F:\\output_tmp.txt","F:\\output_tmp1.txt"); a1.IterMapper(); a1.IterReducer(); a1.clear_tmpfile(); MapReduce2 a2=new MapReduce2("F:\\output_tmp1.txt","F:\\output.txt"); a2.IterMapper(); a2.IterReducer(); } }
测试,输入:
item1 user1 6
item1 user2 5
item1 user3 4
item2 user1 6
item2 user2 5
输出:
item1,item2 0.7071067811865475
item2,item1 0.7071067811865475