一、SQL和MongoDB的术语概率区分
SQL术语/概念 |
MongoDB术语/概念 |
database(数据库) | database(数据库) |
table(表) |
collection(集合) |
row(行) |
document or bson document(文档/bson文档) |
column(列) |
field(字段) |
index(索引) |
index(索引) |
table join(表连接) |
embedded documents and linking(内嵌文档) |
primary key(指定任意唯一列或者复合列作为主键) |
primary key _id列会被自动设置为主键 |
aggregation(聚合,例如group by) | aggregation pipeline(聚合管道) |
二、create和alter
CREATE TABLE user ( id MEDIUMINT NOT NULL AUTO_INCREMENT, user_id Varchar(30), age Number, status char(1), PRIMARY KEY (id)) |
第一次执行 insert() 操作时隐式创建。如果文档中不指定``_id``列,那么会自动添加``_id``列并默认为主键。 db.users.insert( { user_id: "abc123", age: 55, status: "A" } ) 当然,你也可以显式的创建一个集合: db.createCollection("users") |
ALTER TABLE users ADD join_date DATETIME |
集合中并不强制规定文档的结构;比如:集合级别并没有文档结构转换的语句。 然而,在文档级别,可以使用 :method:`~db.collection.update()`方法中的 :update:`$set`操作符添加新的字段到现有的文档中。 db.users.update( { }, { $set: { join_date: new Date() } }, { multi: true } ) |
ALTER TABLE users DROP COLUMN join_date |
db.users.update( { }, { $unset: { join_date: "" } }, { multi: true } ) |
CREATE INDEX idx_user_id_ascON users(user_id) |
db.users.createIndex( { user_id: 1 } ) |
CREATE INDEX idx_user_id_asc_age_descON users(user_id, age DESC) |
db.users.createIndex( { user_id: 1, age: -1 } ) |
DROP TABLE users |
db.users.drop() |
三、CURD基本操作
1.insert
SQL | MongoDB |
insert into users(user_id,age,status) values("aaa123",25,"A"); | db.users.insert({user_id: "aaa123", age:25, status:"A"}) |
2.select
SQL |
MongoDB |
select * from users | db.users.find({}) |
select id,user_id,status from users | db.users.find({}, { user_id: 1, status: 1 }); |
select user_id,status fromo users | db.users.find({}, { _id: 0, user_id: 1, status: 1}) |
select * from users where status = "A" | db.users.find({ status: "A"}) |
select user_id,status from users where status = "A" |
db.users.find({ status: "A"}, {_id: 0, user_id: 1, status: 1}) |
select * from users where status != "A" |
db.users.find({ status: { $ne: "A" } }) |
select * from users where status = "A" and age >=25 |
db.users.find({ status: "A", age: {$gte: 25} } ) |
select * from users where status = "B" or age > 20 | db.users.find({ $or:[ {status: "B"},{age: {$gt:20 } } ] } ) |
select * from users where age > 30 and age < 50 |
db.users.find({age: { $gt: 30, $lt: 50} } ) |
select * from users where user_id like '%bc%' |
db.users.find({user_id: /bc/}) |
select * from users where user_id like 'bc%' | db.users.find({user_id: /^bc/}) |
select * from users where status = "A" order by age desc |
db.users.find({ status: "A"}).sort({age: -1}) |
select count(*) from users | db.users.count() or db.users.find({}).count() |
select count(user_id) from users | db.users.count({ user_id: { $exists: true} } ) or db.users.find({user_id: { $exists: true} }).count() |
select distinct(status) from users |
db.users.distinct("status") |
select * from users limit 1 | db.users.findOne() or db.users.find({}).limit(1) |
select * from users limit 5 skip 10 |
db.users.find().limit(5).skip(10) |
3.update
SQL | MongoDB |
update users set status = "C" where age > 25 | db.users.update({ age: { $gt: 25 } }, { $set: { status: "C" } },{multi: true}) |
update users set age = age + 3 where status = "C" |
db.users.update({ status: "C" }, { $inc: { age: 3 } }, {multi: true}) |
4.remove
SQL | MongoDB |
delete from users where age = 40 | db.users.remove({ age: 40 }) |
四、聚合操作
示例文档结构:
eg1:zipcodes:
{
"_id": "10280",
"city": "NEW YORK",
"state": "NY",
"pop": 5574,
"loc": [
-74.016323,
40.710537
]
}
_id 字段保存邮政编码,使用字符串格式。
city 字段保存城市名称。一个城市对应多个邮编:不同的区使用不同的邮政编码。
state 字段保存州名称的简称(使用了两个字母)。
pop 字段保存了人口总数。
loc 字段使用经纬度对保存位置信息。
eg2:operationData:
{ "_id" : ObjectId("56d279e7e4b07b38df7c2278"), "activityType" : "MICRO_TICKET", "createTime" : ISODate("2016-02-28T04:39:03.365Z"), "accountNo" : "014206", "optionType" : "MICROTICKET_VISIT", "openId" : "oAKBQt52AFC73laYvSI8QJyV3TA8", "fans" : 0, "fee" : 0, "commodityNum" : 0, "count" : 1}
activityType字段代表不同的活动类型,比如砍价活动、预售卡活动、抽奖活动
accountNo字段表示商家编号
optionType字段表示用户的操作类型,比如访问、分享
openId字段表示微信用户的唯一标识
SQL | MongoDB | MongoDB执行结果 |
SELECT state, SUM(pop) AS totalPop FROM zipcodes GROUP BY state HAVING totalPop >= (10*1000*1000) |
db.zipcodes.aggregate([ { $group: { _id: "$state", totalPop: { $sum: "$pop" } } }, { $match: { totalPop: { $gte: 10 * 1000 * 1000 } } } ]) |
{ "_id" : "CA", "totalPop" : 29754890 } |
select activityType,count(*) from operationData group by activityType |
db.operationData.aggregate( [ { $group: { _id: "$activityType", total: { $sum: 1 } } } ]) |
{ "_id" : "SECKILL", "total" : 28803 } |
select accountNo as acountNo,count(*) as pv from operationData where activityType = 'MICRO_TICKET' and createTime >= '2016-03-01 00:00:00' and createTime <= '2016-03-29 23:59:59' |
db.operationData.aggregate( [ { $match: { activityType: "MICRO_TICKET", createTime: { $gte: new Date("2016-03-01 00:00:00"), $lte: new Date("2016-03-29 23:59:59") } } }, { $group: { _id: "$accountNo", totalPop: { $sum: 1 } } }, // 使用project重命名字段 { $project: { _id: 0, accountNo: "$_id", pv: "$totalPop" } } ]) |
{ "accountNo" : "00000", "pv" : 8 } |
select optionType,count(*) as totalPop from zipcodes where activityType = 'BARGAIN' group by optionType having totalPop > 50 order by totalPop desc |
db.operationData.aggregate([ { $match: { activityType: "BARGAIN" } }, { $group: { _id: "$optionType", totalPop: { $sum: 1 } } }, { $sort: { totalPop: -1 } }, { $match: { totalPop: { $gt: 50 } } }, { $limit: 3 } ]) |
{ "_id" : "BARGAIN_VIEW", "totalPop" : 235 } |
select accountNo as accountNo,count(distinct(openId)) as uv from operationData where activityType = 'MICRO_TICKET' group by accountNo |
// 实现思路:管道group两次,第一次以accountNo与openId为key分组, 再从这个结果中以accountNo为key再分组 db.operationData.aggregate([ { $match: { activityType: "MICRO_TICKET"} }, { $group: { _id: { accountNo: "$accountNo", openId: "$openId" }, totalCount: { $sum: 1 } } }, { $group: { _id: "$_id.accountNo", uv: { $sum: 1 } } }, // 使用project重命名字段 { $project: { _id: 0, accountNo: "$_id", uv: "$uv" } } ]) |
{ "uv" : 1, "accountNo" : "111" } |
五、Map-Reduce操作
/**
* SQL的写法:
* SELECT state, SUM(pop) AS totalPop FROM zipcodes
* */
/**
* aggregate聚合的方式
* */
db.zipcodes.aggregate([
{ $group: { _id: "$state", totalPop: { $sum: "$pop" } } }
])
/**
* Map-Reduce的方式
* */
// 第一种Map-Reduce写法
db.getCollection("zipcodes").mapReduce(
function() { emit(this.state, this.pop) }, //mapFunction
(key, values) => { return Array.sum(values) },//reduceFunction
{
out: "map_reduce_example"
})
// 第二种Map-Reduce写法
var mapFunction = function() {
emit(this.state, this.pop);
};
var reduceFunction = function(state, pop) {
return Array.sum(pop);
};
db.zipcodes.mapReduce(
mapFunction,
reduceFunction,
{ out: "map_reduce_example" }
)
/**
* 1.计算每个顾客的总金额
*
* */
var mapFunction2 = function() {
emit(this.cust_id, this.price);
};
var reduceFunction2 = function(custId, price) {
return Array.sum(price);
};
db.orders.mapReduce(
mapFunction2,
reduceFunction2,
{ out: "order_example" }
);
/**
* 2、计算订单总量和每种 sku 订购量的平均值
*
* */
// 定义map方法来处理每一个输入文档:
// 在方法中,this 指的是当前 map-reduce 操作正在处理的文档。
// 该方法逐个处理文档中的每个名目,为每个名目创建一个 sku 和 values 的联合,
var mapFunction3 = function() {
for (var idx = 0; idx < this.items.length; idx++) {
var key = this.items[idx].sku;
var values = {
count: 1,
qty: this.items[idx].qty
};
emit(key, values)
}
};
// 定义相应的reduce函数,它使用两个参数 keySKU 和 countObjVals:
// countObjVals 是一个数组字段,保存了从map函数提交给reduce函数的分组后的多个 keySKU 值。
// 该方法对 countObjVals 数组进行reduce,转换为一个单独的对象 reducedValue
// 在 reducedVal 中, 字段 count 的值是对数组中每个元素中的 count 值求和的结果,qty 字段的值是对对数组中每个元素中的 qty 值求和的结果。
var reduceFunction3 = function(keySKU, countObjVals) {
var reducedValue = { count: 0, qty: 0 };
for (var idx = 0; idx < countObjVals.length; idx++) {
reducedValue.count += countObjVals[idx].count;
reducedValue.qty += countObjVals[idx].qty;
}
return reducedValue;
};
// 定义一个使用两个参数 key 和 reducedVal 的结束函数。该函数在 reducedVal 中添加一个平均值 avg 字段,然后返回修改后的对象:
var finalizeFunction3 = function(key, reducedVal) {
reducedVal.avg = reducedVal.qty / reducedVal.count;
return reducedVal;
}
// 在 orders 集合上执行使用了 mapFunction3, reduceFunction3, 和 finalizeFunction3 方法的 map-reduce 操作。
db.orders.mapReduce(
mapFunction3,
reduceFunction3,
{
out: "map_reduce_example22",
query: {ord_date: { $gte: new Date('2016-03-10 00:00:00')} },
finalize: finalizeFunction3
}
)
// 本次操作使用了 query 字段来选择那些 ord_date 值大于 2016-03-10 00:00:00 的文档。然后把结果输出到集合 map_reduce_example 中。如果 map_reduce_example 已经存在,该输出会合并新的结果到集合中。