MapReduce:基于物品的协同过滤算法的MapReduce实现

基于用户的相似性函数的定义:

MapReduce:基于物品的协同过滤算法的MapReduce实现_第1张图片

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;


public class MapperReduce {
	String input="F:\\input.txt";
	String output="F:\\output.txt";
	public void set_input(String input){
		this.input=input;
	}
	public void set_output(String output){
		this.output=output;
	}
	public void clear_tmpfile() throws IOException{
		FileWriter fw=new FileWriter("F:\\map_result.txt"); 
		String line="";
		fw.write(line);
		fw.close();
	}
	public MapperReduce(String input,String output){//文件初始化,如果存在则删除
		this.input=input;
		this.output=output;
		File file=new File("F:\\map_result.txt");
		if(file.exists()){
			file.delete();
		}
		File file1=new File(this.output);
		if(file1.exists()){
			file1.delete();
		}
	}
	
	public Map> shuff() throws IOException{//shuffle过程,将mapper处理结果相同key的记录合并
		BufferedReader br=new BufferedReader(new FileReader("F:\\map_result.txt"));
        String line="";
        Map> map=new HashMap>();
        while((line=br.readLine())!=null){
        	String[] content=line.split(" ",2);
        	if(map.containsKey(content[0])){
        		List list=new ArrayList();
        		list=map.get(content[0]);
        		if(content.length==2){
        		    list.add(content[1]);
        		}else{
        			list.add(" ");
        		}
        		map.remove(content[0]);
        		map.put(content[0],list);
        	}else{
        			List list=new ArrayList();
            		if(content.length==2){
        		        list.add(content[1]);
            		}else{
            			list.add(" ");
            		}
        		    map.put(content[0],list);
        	}
        }
        map.remove("");//把key为空格的行删除
		Iterator It = map.keySet().iterator();
		//while(It.hasNext()){System.out.println(map.get(It.next()));}//测试的时候用
		return map;
	}
	
	public void IterMapper() throws IOException{
		//指定input数据文本路径,循环地按行输入,每一行中分key和value,其中区分标志为第一个空格,按行遍历mapper
		BufferedReader br=new BufferedReader(new FileReader(this.input));
        String line="";
        int count=1;
        while((line=br.readLine())!=null){
        	if(!line.trim().equals("")){
        		Mapper(String.valueOf(count),line);
        	} 
        	count +=1;
        }
	}
	
	public void IterReducer() throws IOException{
		//shuffle过程的生成的数据是一个map数据类型记录,按每条记录遍历reducer
		Map> map=shuff();
		Iterator Iter=map.keySet().iterator();
    	while(Iter.hasNext()){
    		String tmp=Iter.next();
    		Iterator IterList=map.get(tmp).iterator();
    		Reducer(tmp,IterList);
    	}
		
	}
	
	public void WriteMaper(String new_key,String new_value) throws IOException{
		//指定mapper输出的文本路径,然后写入一行  new_key 与 new_value 用空格空开
		FileWriter fw=new FileWriter("F:\\map_result.txt",true); 
		String line=new_key+" "+new_value+"\r\n";
		fw.write(line);
		fw.close();
	}
	
	public void WriteReducer(String new_key,String new_value) throws IOException{
		//指定reducer输出的文件路径,然后写入一行  new_key 与 new_value 用空格空开
		FileWriter fw=new FileWriter(this.output,true); 
		String line=new_key+" "+new_value+"\r\n";
		fw.write(line);
		fw.close();
	}
	
//map函数开始
	public void Mapper(String key,String value) throws IOException{

	}
//map函数结束
	
//reduce函数开始	
	public void Reducer(String key,Iterator value) throws IOException{

	}
//reduce函数结束		
}


import java.io.IOException;  
import java.util.ArrayList;
import java.util.Iterator;  
import java.util.List;
 
  
  
public class MapReduce0 extends MapperReduce{  
  
    public MapReduce0(String input,String output){  
        super(input,output);  
    }  
       
      
    //map函数开始  
        public void Mapper(String key,String value) throws IOException{  
            //key为行偏移量,value为每一行的值  
        	String[] content=value.split(" ",2);
        	WriteMaper(content[0],content[1]);
        }  
    //map函数结束  
          
    //reduce函数开始      
        public void Reducer(String key,Iterator value) throws IOException{  
        	List list=new ArrayList();
        	float sum=0;
        	int count=0;
        	while(value.hasNext()){
        		String tmp=value.next();
        		list.add(tmp);
        		sum +=Float.parseFloat(tmp.split(" ")[1]); 
        		count +=1;
        	}
        	float avg=sum/count;
        	for(int i=0;i
 
  
import java.io.IOException;  
import java.util.ArrayList;  
import java.util.Iterator;  
import java.util.List;  
 
  
  
public class MapReduce1 extends MapperReduce{  
  
    public MapReduce1(String input,String output){  
        super(input,output);  
    }   
      
    public void printf(List list1,List list2) throws IOException{  
        if((!list1.isEmpty()) && (!list2.isEmpty())){  
            Iterator Iter1=list1.iterator();  
            while(Iter1.hasNext()){  
                String tmp1=Iter1.next();  
                Iterator Iter2=list2.iterator();  
                while(Iter2.hasNext()){  
                    String tmp2=Iter2.next(); 
                    if(!tmp1.split(" ")[0].equals(tmp2.split(" ")[0]))
                          {WriteReducer(tmp1,tmp2);}
                }  
            }  
        }  
    }  
      
    //map函数开始  
        public void Mapper(String key,String value) throws IOException{  
            //key为行偏移量,value为每一行的值  
            String[] count=value.split(" ");  
            if(!count[1].trim().equals("")){  
                WriteMaper(count[1].trim(),count[0].trim()+" "+count[2].trim()+" "+"1");  
                WriteMaper(count[1].trim(),count[0].trim()+" "+count[2].trim()+" "+"2");  
            }  
        }  
    //map函数结束  
          
    //reduce函数开始      
        public void Reducer(String key,Iterator value) throws IOException{  
            List list1=new ArrayList();  
            List list2=new ArrayList();  
            while(value.hasNext()){  
                String tmp=value.next();  
                String[] tmp1=tmp.split(" ",2); 
                if(tmp1[1].split(" ")[1].equals("1")){  
                    list1.add(tmp1[0]+" "+tmp1[1].split(" ")[0]);  
                }else{  
                    list2.add(tmp1[0]+" "+tmp1[1].split(" ")[0]); 
                }  
            }  
            printf(list1,list2);  
        }  
    //reduce函数结束   
}
 
  
import java.io.IOException;  
import java.util.Iterator;  
 
  
  
public class MapReduce2 extends MapperReduce{  
  
    public MapReduce2(String input,String output){  
        super(input,output);  
    }  
       
      
    //map函数开始  
        public void Mapper(String key,String value) throws IOException{  
            //key为行偏移量,value为每一行的值  
        	String[] content=value.split(" ");
        	//注意new_key中不要有空格,因为shuffle中是以第一个空格为单位进行划分key和value的
        	WriteMaper(content[0]+","+content[2],content[1]+" "+content[3]);
        }  
    //map函数结束  
          
    //reduce函数开始      
        public void Reducer(String key,Iterator value) throws IOException{  
        	float p = 0,p1=0,p2=0,tmp1=0,tmp2=0;
        	while(value.hasNext()){
        		String tmp=value.next();
        		tmp1=Float.parseFloat(tmp.split(" ")[0]);
        		tmp2=Float.parseFloat(tmp.split(" ")[1]);
        		p=p+tmp1*tmp2;
        		p1=p1+tmp1*tmp1;
        		p2=p2+tmp2*tmp2;
        	}
        	String new_value=String.valueOf(p/(Math.sqrt(p1)*Math.sqrt(p2)));
        	WriteReducer(key,new_value);
        }  
    //reduce函数结束  

        public static void main(String[] args) throws IOException {
            MapReduce0 a0=new MapReduce0("F:\\input.txt","F:\\output_tmp.txt"); 
            a0.IterMapper();  
            a0.IterReducer();  
            a0.clear_tmpfile();
            MapReduce1 a1=new MapReduce1("F:\\output_tmp.txt","F:\\output_tmp1.txt"); 
            a1.IterMapper();  
            a1.IterReducer();  
            a1.clear_tmpfile();
            MapReduce2 a2=new MapReduce2("F:\\output_tmp1.txt","F:\\output.txt"); 
            a2.IterMapper();  
            a2.IterReducer();  
        }      
}


 
  


测试,输入:

item1 user1 6
item1 user2 5
item1 user3 4
item2 user1 6
item2 user2 5


输出:

item1,item2 0.7071067811865475
item2,item1 0.7071067811865475





你可能感兴趣的:(MapReduce:基于物品的协同过滤算法的MapReduce实现)