mongo-mapreduce测试(10)——阶段总结(2)

 

mongo-mapreduce测试(1)——count/sum/where条件

 

mongo-mapreduce测试(2)——列转行与finalize函数

 

mongo-mapreduce测试(3)——group by having

 

mongo-mapreduce测试(4)——avg

 

mongo-mapreduce测试(5)——max/min

 

mongo-mapreduce测试(6)——综合测试

 

mongo-mapreduce测试(7)——使用js存储过程

 

mongo-mapreduce测试(8)——阶段总结(1)

 

mongo-mapreduce测试(9)——python调用

 

mongo-mapreduce测试(10)——阶段总结(2)

 

mongo-mapreduce测试(11)——跟踪调试

MR的输出结果集选项如下:

out: { <action>: <collectionName>

         [, db: <dbName>]

         [, sharded: <boolean> ]

         [, nonAtomic: <boolean> ] }

1. 对于action,有4个选项可供使用(默认情况下是replace)

{ replace : "collectionName" } - the output will be inserted into a collection which will atomically replace any existing collection with the same name.
{ merge : "collectionName" } - This option will merge new data into the old output collection. In other words, if the same key exists in both the result set and the old collection, the new key will overwrite the old one.
{ reduce : "collectionName" } - If documents exists for a given key in the result set and in the old collection, then a reduce operation (using the specified reduce function) will be performed on the two values and the result will be written to the output collection. If a finalize function was provided, this will be run after the reduce as well.
{ inline : 1} - With this option, no collection will be created, and the whole map-reduce operation will happen in RAM. Also, the results of the map-reduce will be returned within the result object. Note that this option is possible only when the result set fits within the 16MB limit of a single document.
默认情况下,MR每次都会先将结果表清空,然后再将数据插入该表(使用count功能进行测试)

> var map = function() {
... emit(this.name, 1);
... };
> var reduce = function(key, values) {
... return values.length;
... };
1.1. replace:执行MR,默认使用replace参数将结果集删除后重建
> db.tianyc_test3.mapReduce(map, reduce, {out: "tianyc_test3_result2"});
{
"result" : "tianyc_test3_result2",
"timeMillis" : 197,
"counts" : {
"input" : 11,
"emit" : 11,
"reduce" : 3,
"output" : 3
},
"ok" : 1,
}
> db.tianyc_test3_result2.find()
{ "_id" : "neu", "value" : 4 }
{ "_id" : "xtt", "value" : 5 }
{ "_id" : "yct", "value" : 2 }
1.2 merge:修改测试数据。执行MR,使用merge参数将新结果更新到结果集中。
> db.tianyc_test3.insert({name:'neu',dic:10})
> db.tianyc_test3.insert({name:'xjbu',dic:1})
> db.tianyc_test3.insert({name:'xjbu',dic:2})
> db.tianyc_test3.remove({_id:'yct'})
> db.tianyc_test3.mapReduce(map, reduce, {out: {merge:"tianyc_test3_result2"}});
{
"result" : "tianyc_test3_result2",
"timeMillis" : 9,
"counts" : {
"input" : 14,
"emit" : 14,
"reduce" : 4,
"output" : 4
},
"ok" : 1,
}
> db.tianyc_test3_result2.find()
{ "_id" : "neu", "value" : 5 }
{ "_id" : "xtt", "value" : 5 }
{ "_id" : "yct", "value" : 2 }
{ "_id" : "xjbu", "value" : 2 }
>
1.3 reduce:执行MR,使用reduce参数,将新结果与tianyc_test3_result2中已有的集合再次reduce,将最终结果更新到结果集中。
> db.tianyc_test3.mapReduce(map, reduce, {out: {reduce:"tianyc_test3_result2"}});
{
"result" : "tianyc_test3_result2",
"timeMillis" : 245,
"counts" : {
"input" : 14,
"emit" : 14,
"reduce" : 4,
"output" : 4
},
"ok" : 1,
}
> db.tianyc_test3_result2.find()
{ "_id" : "neu", "value" : 2 }
{ "_id" : "xtt", "value" : 2 }
{ "_id" : "yct", "value" : 2 }
{ "_id" : "xjbu", "value" : 2 }
1.4 inline:执行MR,使用{inline:1}参数,将MR结果直接输出到屏幕上。
> db.tianyc_test3.mapReduce(map, reduce, {out: {inline : 1}});
{
"results" : [
{
"_id" : "neu",
"value" : 5
},
{
"_id" : "xjbu",
"value" : 2
},
{
"_id" : "xtt",
"value" : 5
},
{
"_id" : "yct",
"value" : 2
}
],
"timeMillis" : 21,
"counts" : {
"input" : 14,
"emit" : 14,
"reduce" : 4,
"output" : 4
},
"ok" : 1,
}
>

2. 对于db

可以将MR结果集输出到指定的db中,默认是当前db。这个参数的好处就是可以将MR结果全部存储到一个专用的数据库中。下面是一个小测试:

# 在Gps数据库中执行MR,指定结果集输出到test库。

> use Gps
switched to db Gps
> var map = function() {
... emit(this.name, {cnt: 1});
... };
> var reduce = function(key, values) {
... var count = 0;
... for (var i=0;i<values.length;i++){
... count+=values[i].cnt;
... }
... return count;
... };
> db.tianyc_test3.mapReduce(map, reduce, {out: {replace:"tianyc_test3_result",db:'test'}});
{
"result" : {
"db" : "test",
"collection" : "tianyc_test3_result"
},
"counts" : {
"input" : NumberLong(14),
"emit" : NumberLong(14),
"reduce" : NumberLong(4),
"output" : NumberLong(4)
},
"timeMillis" : 458,
"timing" : {
"shardProcessing" : 12,
"postProcessing" : 445
},
"shardCounts" : {
"seta/10.x.x.1:1111,10.x.x.2:2222" : {
"input" : 14,
"emit" : 14,
"reduce" : 4,
"output" : 4
}
},
"postProcessCounts" : {
"setb/10.x.x.3:3333,10.x.x.4:4444" : {
"input" : NumberLong(4),
"reduce" : NumberLong(0),
"output" : NumberLong(4)
}
},
"ok" : 1,
}
> use test
switched to db test
> show collections
system.indexes
tianyc_test3_result
> db.tianyc_test3_result.find()
{ "_id" : "neu", "value" : 5 }
{ "_id" : "xjbu", "value" : 2 }
{ "_id" : "xtt", "value" : 5 }
{ "_id" : "yct", "value" : 2 }

参考mongodb官网

你可能感兴趣的:(mapreduce)