MongoDB-MongoDB高级查询

文章目录

      • MongoDB高级查询
        • 数据
        • 聚合函数
        • $match 过滤
        • $project
        • $sort ,$limit,$skip
        • $sample
        • 可视化工具
        • selenium + mongodb爬取京东商品

MongoDB高级查询

数据

from pymongo import MongoClient
import datetime

client=MongoClient('mongodb://root:[email protected]:27017')
table=client['db1']['emp']
l=[
('张飞','male',18,'20170301','teacher',7300.33,401,1), #以下是教学部
('张云','male',78,'20150302','teacher',1000000.31,401,1),
('刘备','male',81,'20130305','teacher',8300,401,1),
('关羽','male',73,'20140701','teacher',3500,401,1),
('曹操','male',28,'20121101','teacher',2100,401,1),
('诸葛亮','female',18,'20110211','teacher',9000,401,1),
('周瑜','male',18,'19000301','teacher',30000,401,1),
('司马懿','male',48,'20101111','teacher',10000,401,1),

('袁绍','female',48,'20150311','sale',3000.13,402,2),#以下是销售部门
('张全蛋','female',38,'20101101','sale',2000.35,402,2),
('鹌鹑蛋','female',18,'20110312','sale',1000.37,402,2),
('王尼玛','female',18,'20160513','sale',3000.29,402,2),
('我尼玛','female',28,'20170127','sale',4000.33,402,2),

('杨过','male',28,'20160311','operation',10000.13,403,3), #以下是运营部门
('小龙女','male',18,'19970312','operation',20000,403,3),
('郭靖','female',18,'20130311','operation',19000,403,3),
('黄蓉','male',18,'20150411','operation',18000,403,3),
('梅超风','female',18,'20140512','operation',17000,403,3)
]

for n,item in enumerate(l):
    d={
        "_id":n,
        'name':item[0],
        'sex':item[1],
        'age':item[2],
        'hire_date':datetime.datetime.strptime(item[3],'%Y%m%d'),
        'post':item[4],
        'salary':item[5]
    }
    table.save(d)

聚合函数

# max min count avg sum
db.emp.aggregate(
    {"$group":{
    "_id":"$post",
    "最高工资":{"$max":"$salary"},
    "最低工资":{"$min":"$salary"},
    "平均工资":{"$avg":"$salary"},
    "总工资":{"$sum":"$salary"},
    "人数":{"$sum":1}
    }}
)
# 没有count函数,可以通过sum1来实现,原理每一行数据加1 

db.emp.aggregate(
    {"$group":{
    "_id":"$post",
    "第一个":{"$first":"$name"},
    "最后一个":{"$last":"$name"}
    
    }}
)

# push和addToSet都是把分组下的某个数据放到一起,等同于mysql group_concat
db.emp.aggregate(
    {"$group":{
    "_id":"$post",
    "人员名单":{"$push":"$name"}
    }}
)

db.emp.aggregate(
    {"$group":{
    "_id":"$post",
    "人员名单":{"$addToSet":"$name"}
    }}
)

$match 过滤

# $match用于对数据进行过滤
db.emp.aggregate(
    {"$match":{"name":"鹌鹑蛋"}}
)

db.emp.aggregate(
    {"$match":{"_id":{"$gt":3}}}
    
)

# 查看部门最高工资大于10000部门信息
db.emp.aggregate(
    {"$group":{
    "_id":"$post",
    "max_salary":{"$max":"$salary"}
    }},
    {"$match":{"max_salary":{"$gt":10000}}},
    {"$match":{"_id":{"$ne":"teacher"}}}
)

$project

# project翻译为投射,即将一个数据结果映射为另一个结果,过程中可以对某些数据进行修改,控制其最终显示的结果

# 乘法运算 计算年薪
db.emp.aggregate(
{"$project":{
"name":1,
"_id":0,
"year_salary":{"$multiply":[12,"$salary"]}
}}
)

# 年龄加10
db.emp.aggregate(
{"$project":{
"name":1,
"_id":0,
"after_10_year":{"$add":[10,"$age"]}
}}
    
# 减法
db.emp.aggregate(
{"$project":{
"name":1,
"_id":0,
"befor_10_year":{"$subtract":["$age",10]}
}}
)

# 表达式之日期表达
db.emp.aggregate(
{"$project":{
"name":1,
"_id":0,
"year":{"$year":"$hire_date"}
}}
)
    
# 查看每个员工工作多长时间
db.emp.aggregate(
{"$project":{
"name":1,
"_id":0,
"job_year":{"$subtract":[{"$year":new Date()},{"$year":"$hire_date"}]}
}}
)

# 字符串截取
db.emp.aggregate(
{"$project":{
"first_name":{"$substr":["$name",0,3]},
"_id":0
}}
)

# 拼接
db.emp.aggregate(
{"$project":{
"full_info":{"$concat":["$name","$post"]},
"_id":0
}}
)

s o r t , sort , sort,limit,$skip

#sort 1升序 -1降序
db.emp.aggregate(
    {"$sort":{"_id":1}},
    {"$limit":10},
    {"$skip":1} # 跳过多少文档
)

$sample

# 从集合中随机选取3个文档
db.emp.aggregate({"$sample":{"size":3}})

可视化工具

https://robomongo.org

selenium + mongodb爬取京东商品

==============================spider.py===========================
import time
from urllib.parse import urlencode
from selenium.webdriver import Chrome
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from mongoTest import DBTool

driver = Chrome()
kw = "黄金"
par = {"enc":"utf-8","keyword":kw,"wq":kw}
kw = urlencode(par)
url = "https://search.jd.com/Search?" + kw
driver.get(url)

#等待
driver.implicitly_wait(10)

# 执行js获取总高度
height = driver.execute_script("return document.body.clientHeight")
# print(height,type(height))
# 已经滑动的距离
driver.implicitly_wait(5)

def get_datas():
    # 获取包裹商品的大标签
    ul = driver.find_element_by_class_name("gl-warp")
    # 商品列表
    items = ul.find_elements_by_class_name("gl-item")
    if len(items) == 60:
        return items
    return get_datas()

def paser_data(items):
    print(len(items))
    for i in items:
        link = i.find_element_by_css_selector(".p-img a").get_attribute("href")
        img = i.find_element_by_css_selector(".p-img a img").get_attribute("src")
        if not img:
            img = i.find_element_by_css_selector(".p-img a img").get_attribute("data-lazy-img")
            img = "https:" + img

        price = i.find_element_by_css_selector(".p-price i").text
        title = i.find_element_by_css_selector(".p-name a em").text
        shop_name = i.find_element_by_css_selector(".p-shop a").text
        commit = i.find_element_by_css_selector(".p-commit").text
        print("=========================================")

        dic = {"link":link,"img":img,"title":title,"shop_name":shop_name,"commit":commit,"price":price}

        DBTool.insert_data(dic)

def get_next():
    # 下一页按钮
    next = driver.find_element_by_partial_link_text("下一页")
    next.click()
    # 滑动屏幕
    driver.execute_script("""
    window.scrollTo({
        top: %s,
        behavior: "smooth"
    });""" % height)

    print("11111")
    # 等待下一页按钮可以被点击时,说明上面的item已经出现了
    time.sleep(1)
    print("continue....")
    items = get_datas()
    paser_data(items)


# 首页的处理
driver.execute_script("""
    window.scrollTo({
        top: %s,
        behavior: "smooth"
    });""" % height)
items = get_datas()
paser_data(items)

for i in range(5):
    get_next()

time.sleep(30)
driver.close()

==============================DBTool.py============================
"""
连接数据库 保存数据

"""
from pymongo import MongoClient
table = None
c = None

def connect_server():
    global table,c
    c = MongoClient("mongodb://root:[email protected]:27017")
    table = c["jd"]["jd_data"]
    print(table)

def insert_data(data):
    if not table:
        connect_server()
    table.insert(data)

def close():
    c.close()

你可能感兴趣的:(MongoDB)