python编写mongodb中的map/reduce


目的:求下面user_info表中班级2中的各个学科的分数总和和平均分。
具体的内容如下:
{"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}}
{"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}}
{"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}}
{"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}}
{"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}}

1,使用遍历读取的方式

使用find()遍历class为2的值,进行统计计算,具体的python代码如下:

#!/usr/bin/env python
# -*- encoding:utf-8 -*-
from pymongo import Connection
from bson.code import Code
from bson.son import SON
mongoconn = Connection('192.168.0.203',27017)
db = mongoconn['things']["user_info"]
db.drop()
db.insert({"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}})
db.insert({"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}})
db.insert({"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}})
db.insert({"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}})
db.insert({"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}})
print time.time()
score = {"math":0,
         "english":0,
         "chinese":0,
         }
a = db.find({"class":2})
b = a.count()
print b
for i in a:
    score["math"] += i["score"]["math"]
    score["english"] += i["score"]["english"]
    score["chinese"] += i["score"]["chinese"]
               
math_avg = float(score["math"])/b
english_avg = float(score["english"])/b
chinese_avg = float(score["chinese"])/b
print "-------------------------------------------------"
print score
print "--------------------------------------------------"
print "math average score is :",math_avg
print "english average score is :",english_avg
print "chinese avarege score is :",chinese_avg


2,使用mongodb自带的map/reduce来统计
a,map/reduce简介
map/reduce是一个并行的分布式模型。用来大规模数据的计算。具体的工作过程主要分为map和reduce2个部分。每一个阶段都由key-value即健值对的形式作为输入和输出。
具体的key-value的格式多种多样,由具体的程序来定义。 map阶段读入数据,成生key-value。 reduce读入由map函数生成的key-value进行计算,返回结果

mongodb中的map/reduce过程:
读入collection
执行map函数,用emit函数生成key-value
执行reduce函数,遍历map的输出,进行统计
返回结果collection

b,python的具体实现
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
from pymongo import Connection
from bson.code import Code
from bson.son import SON
mongoconn = Connection('192.168.0.203',27017)
db = mongoconn['things']["user_info"]
db.drop()
db.insert({"uid":"a123","type":"man","class":2,"score":{"math":80,"english":60,"chinese":90}})
db.insert({"uid":"b123","type":"female","class":2,"score":{"math":100,"english":90,"chinese":80}})
db.insert({"uid":"c123","type":"man","class":2,"score":{"math":60,"english":50,"chinese":88}})
db.insert({"uid":"d123","type":"female","class":2,"score":{"math":79,"english":87,"chinese":78}})
db.insert({"uid":"e123","type":"female","class":1,"score":{"math":79,"english":87,"chinese":78}})
map = Code("function() {"
          "   emit(this.class,this.score);"
          "}"
          )
          
reduce = Code("function(key,values){"
              "    var result={math:0,english:0,chinese:0};"
              "   for (var i = 0; i < values.length; i++) {"  
              "       result.math += values[i].math;"
              "       result.english += values[i].english;"
              "       result.chinese += values[i].chinese;"
              "   return result;"
              "}"        
             )
results = db.map_reduce(map,reduce,"class_user")
a = db.find({"class":2})
b = a.count()
for i in results.find({"_id":2}):
    print "*********************************************************"
    print i
    print "math average score is :",i["value"]["math"]/b
    print "english average score is :",i["value"]["english"]/b
    print "chinese avarege score is :",i["value"]["chinese"]/b