//例子1函数准备: function map(){ emit(this.name,{count:1}); } > function reduce(key,value){ ... var result = {count:0}; ... for(var i=0;i<value.length;i++){ ... result.count += value[i].count; ... } ... result.ext = "haha"; ... return result; ... } //对比例子1-1: > db.person.find() { "_id" : ObjectId("566e89f8382a8419511d02ab"), "name" : "hxc", "age" : 20 } { "_id" : ObjectId("566e8a0c382a8419511d02ac"), "name" : "hxc", "age" : 24 } { "_id" : ObjectId("566e8a1c382a8419511d02ad"), "name" : "zjl", "age" : 34 } { "_id" : ObjectId("566e8a1c382a8419511d02ad"), "name" : "zjl", "age" : 26 } { "_id" : ObjectId("566e8a32382a8419511d02ae"), "name" : "xx", "age" : 32 } { "_id" : ObjectId("566e8a42382a8419511d02af"), "name" : "abcdweb", "age" : 32 } { "_id" : ObjectId("566e8a52382a8419511d02b0"), "name" : "abcdweb", "age" : 32 } { "_id" : ObjectId("566e8a5f382a8419511d02b1"), "name" : "xx", "age" : 32 } > db.person.mapReduce(map,reduce,{"out":"collection"}) { "result" : "collection", "timeMillis" : 285, "counts" : { "input" : 8, "emit" : 8, "reduce" : 4, //此值为4 "output" : 4 }, "ok" : 1 } > db.collection.find() { "_id" : "abcdweb", "value" : { "count" : 2, "ext" : "haha" } } { "_id" : "hxc", "value" : { "count" : 2, "ext" : "haha" } } { "_id" : "xx", "value" : { "count" : 2, "ext" : "haha" } } { "_id" : "zjl", "value" : { "count" : 2, "ext" : "haha" } } //对比例子1-2: > db.person.find() { "_id" : ObjectId("566e8a1c382a8419511d02ad"), "name" : "zjl", "age" : 34 } { "_id" : ObjectId("566e8a32382a8419511d02ae"), "name" : "xx", "age" : 32 } { "_id" : ObjectId("566e8a5f382a8419511d02b1"), "name" : "xx", "age" : 32 } { "_id" : ObjectId("566e8fdd382a8419511d02b2"), "name" : "zjl", "age" : 26 } { "_id" : ObjectId("566e90a4382a8419511d02b3"), "name" : "hxc", "age" : 20 } { "_id" : ObjectId("566e90bd382a8419511d02b4"), "name" : "abcdweb", "age" : 32 } > db.person.mapReduce(map,reduce,{"out":"collection"}) { "result" : "collection", "timeMillis" : 271, "counts" : { "input" : 6, "emit" : 6, "reduce" : 2, //此值为2 "output" : 4 }, "ok" : 1 } > db.collection.find() { "_id" : "abcdweb", "value" : { "count" : 1 } } { "_id" : "hxc", "value" : { "count" : 1 } } //留意,此行和上一行均没有“ext”字段,表明该两组并没有执行reduce函数;value为emit的value { "_id" : "xx", "value" : { "count" : 2, "ext" : "haha" } } { "_id" : "zjl", "value" : { "count" : 2, "ext" : "haha" } } > 总结:map是哪来分组的,具体实现是通过emit,emit的key则为分组字段,其value的集合(即数组)会传给reduce函数执行, 若value集合长度小于2,则该分组不会传到reduce函数执行(因为reduce的字面意思是“简化”,是将集合数据进行精简,统计获取一个结果;而若集合只有一条数据,根本就不需要统计了
更多用法,参考:
MongoDB:MapReduce基础及实例
MongoDB MapReduce学习笔记