MapReduce要实现两个函数:Map和Reduce。Map函数调用emit(key,value)遍历一个或多个集合中所有的记录,进行分组(group by),然后将key与value传给Reduce函数进行处理,输出结果。
db.collection.mapReduce(
【map函数】
map是JavaScript 函数,负责将每一个输入文档转换为零或多个文档,通过key进行分组,生成键值对序列,作为 reduce 函数参数。key对文档进行分组,value是要统计的数据,value可以是JSON对象(emit只能容纳MongoDB的最大BSON文件大小的一半)。我们对订单的详细统计每个产品类型卖出了多少个。我们先通过 pnumber进行分组,然后在对 quantity相加 相当于select pnumber,sum(quantity) from item group by pnumber
db.item.insert( [
{
"quantity" : 2,
"price" : 5.0,
"pnumber" : "p003"
},{
"quantity" : 2,
"price" : 8.0,
"pnumber" : "p002"
},{
"quantity" : 1,
"price" : 4.0,
"pnumber" : "p002"
},{
"quantity" : 2,
"price" : 4.0,
"pnumber" : "p001"
},{
"quantity" : 4,
"price" : 10.0,
"pnumber" : "p003"
},{
"quantity" : 10,
"price" : 20.0,
"pnumber" : "p001"
},{
"quantity" : 10,
"price" : 20.0,
"pnumber" : "p003"
},{
"quantity" : 5,
"price" : 10.0,
"pnumber" : "p002"
}
])
> var map = function() { emit(this.pnumber,this.quantity)}
> var reduce=function(key,values){return {'pumber':key,'quantity':Array.sum(values)}}
> db.item.mapReduce( map,
reduce,
{ out: "map_reduce_data" }
)
> db.map_reduce_data.find()
{ "_id" : "p001", "value" : { "pumber" : "p001", "quantity" : 12 } }
{ "_id" : "p002", "value" : { "pumber" : "p002", "quantity" : 8 } }
{ "_id" : "p003", "value" : { "pumber" : "p003", "quantity" : 20 } }
【query过滤的条件】
对符合条件的文档将会执行map函数。(query。limit,sort可以随意组合), 我们对订单的详细的每次每种产品卖出的数量要大于5的并统计每个产品类型卖出了多少个。我们先通过 pnumber进行分组,然后在对 quantity相加 相当于select pnumber,sum(quantity) from item where quantity>5 group by pnumber
> var map = function() { emit(this.pnumber,this.quantity)}
> var reduce=function(key,values){return {'pumber':key,'quantity':Array.sum(values)}}
> db.item.mapReduce( map,
reduce,
{ query:{'quantity':{$gt:5}},
out: "map_reduce_data" } )
{
"result" : "map_reduce_data",
"timeMillis" : 5,
"counts" : {
"input" : 2,
"emit" : 2,
"reduce" : 0,
"output" : 2
},
"ok" : 1
}
> db.map_reduce_data.find()
{ "_id" : "p001", "value" : 10 }
{ "_id" : "p003", "value" : 10 }
>var map = function() {emit(this.pnumber,{count:1});}
> var reduce=function(key,values){
var count=0;
values.forEach(function(val){ count+=val.count;});
return {'pumber':key,"count":count};
}
> db.item.mapReduce( map,
reduce,
{ out: "map_reduce_data" }
)
{
"result" : "map_reduce_data",
"timeMillis" : 6,
"counts" : {
"input" : 10,
"emit" : 10,
"reduce" : 3,
"output" : 3
},
"ok" : 1
}
> db.map_reduce_data.find()
{ "_id" : "p001", "value" : { "pumber" : "p001", "count" : 2 } }
{ "_id" : "p002", "value" : { "pumber" : "p002", "count" : 3 } }
{ "_id" : "p003", "value" : { "pumber" : "p003", "count" : 5 } }
我们对统计订单中对应的产品销售了多少个,我们先通过 pnumber进行分组,然后在对 quantity相加。
db.orders.insert( [
{
"onumber" : "001",
"item" : [{
"quantity" : 2,
"price" : 5.0,
"pnumber" : "p003"
},{
"quantity" : 2,
"price" : 8.0,
"pnumber" : "p002"
}]
},{
"onumber" : "002",
"item" : [{
"quantity" : 1,
"price" : 4.0,
"pnumber" : "p002"
},{
"quantity" : 2,
"price" : 4.0,
"pnumber" : "p001"
},{
"quantity" : 4,
"price" : 10.0,
"pnumber" : "p003"
}]
},{
"onumber" : "003",
"item" : [{
"quantity" : 10,
"price" : 20.0,
"pnumber" : "p001"
},{
"quantity" : 10,
"price" : 20.0,
"pnumber" : "p003"
}]
},{
"onumber" : "004",
"item" : [{
"quantity" : 5,
"price" : 10.0,
"pnumber" : "p002"
}]
}
])
> var map = function() { this.item.forEach(function(it){ emit(it.pnumber,it.quantity); })}
> var reduce=function(key,values){return {'pumber':key,'quantity':Array.sum(values)}}
> db.orders.mapReduce( map,
reduce,
{ out: "map_reduce_data" } )
{
"result" : "map_reduce_data",
"timeMillis" : 51,
"counts" : {
"input" : 4,
"emit" : 8,
"reduce" : 3,
"output" : 3
},
"ok" : 1
}
> db.map_reduce_data.find()
{ "_id" : "p001", "value" : { "pumber" : "p001", "quantity" : 12 } }
{ "_id" : "p002", "value" : { "pumber" : "p002", "quantity" : 8 } }
{ "_id" : "p003", "value" : { "pumber" : "p003", "quantity" : 16 } }
reduce是JavaScript 函数,对map操作的输出做合并的化简的操作(将key-values变成key-value,也就是把values数组变成一个单一的值value)。
function(key, values) {>var map = function() {
var value={count:1, quantity:this.quantity};
emit(this.pnumber,value);
}
>var reduce=function(key,values){
var reducedVal = { count: 0, quantity: 0 };
for (var i = 0; i < values.length; i++) {
reducedVal.count += values[i].count;
reducedVal.quantity += values[i].quantity;
}
return reducedVal;
}
{
"result" : "map_reduce_data",
"timeMillis" : 7,
"counts" : {
"input" : 10,
"emit" : 10,
"reduce" : 3,
"output" : 3
},
"ok" : 1
}
> db.map_reduce_data.find()
{ "_id" : "p001", "value" : { "count" : 2, "quantity" : 12 } }
{ "_id" : "p002", "value" : { "count" : 3, "quantity" : 8 } }
{ "_id" : "p003", "value" : { "count" : 5, "quantity" : 20 } }
{
"result" : "map_reduce_data",
"timeMillis" : 7,
"counts" : {
"input" : 10,
"emit" : 10,
"reduce" : 3,
"output" : 3
},
"ok" : 1
}