mongodb的基本使用以及pymong的用法

MongoDB 的安装

MongoDB 是由 C++ 语言编写的非关系型数据库,是一个基于分布式文件存储的开源数据库系统,其内容存储形式类似 JSON 对象,它的字段值可以包含其他文档、数组及文档数组,非常灵活。

MongoDB 支持多种平台,包括 Windows、Linux、Mac OS、Solaris 等,在其官方网站 https://www.mongodb.com/download-center 均可找到对应的安装包。

  • 官方网站:https://www.mongodb.com
  • 官方文档:https://docs.mongodb.com
  • GitHub:https://github.com/mongodb
  • 中文教程:http://www.runoob.com/mongodb/mongodb-tutorial.html

MongoDB 的使用

> show dbs
admin   0.000GB
config  0.000GB
local   0.000GB
> db.runoob.insert({"name":"菜鸟教程"})

>db.site2.find()
{ "_id" : 1, "name" : "RUNOOB", "cn_name" : "菜鸟教程" }
{ "_id" : 2, "name" : "Google", "address" : "Google 搜索" }
{ "_id" : 3, "name" : "Facebook", "address" : "脸书" }
{ "_id" : 4, "name" : "Taobao", "address" : "淘宝" }
{ "_id" : 5, "name" : "Zhihu", "address" : "知乎" }
> show collections
TutorialItem
> db.TurorialItem.find()

pymongo的用法

创建表

#!/usr/bin/env Python
# coding=utf-8
import pymongo
#连接数据库
myclient = pymongo.MongoClient('mongodb://localhost:27017/')
#创建数据库
# mydb = myclient["rundb"]

#列出数据库的名称
# dblist = myclient.list_database_names()
# print(dblist)
# # dblist = myclient.database_names()
# if "runoobdb" in dblist:
#   print("数据库已存在!")

#创建表或者集合
mydb = myclient['rundb']
mycol = mydb["sites"]
collist = mydb. list_collection_names()
print(collist)

#插入数据
mydict = {"name": "RUNOOB", "alexa": "10000", "url": "https://www.runoob.com"}

x = mycol.insert_one(mydict)
print(x)
print(x)

mylist = [
    {"name": "Taobao", "alexa": "100", "url": "https://www.taobao.com"},
    {"name": "QQ", "alexa": "101", "url": "https://www.qq.com"},
    {"name": "Facebook", "alexa": "10", "url": "https://www.facebook.com"},
    {"name": "知乎", "alexa": "103", "url": "https://www.zhihu.com"},
    {"name": "Github", "alexa": "109", "url": "https://www.github.com"}
]

x = mycol.insert_many(mylist)

# 输出插入的所有文档对应的 _id 值
print(x.inserted_ids)
mycol = mydb["site2"]

mylist = [
    {"_id": 1, "name": "RUNOOB", "cn_name": "菜鸟教程"},
    {"_id": 2, "name": "Google", "address": "Google 搜索"},
    {"_id": 3, "name": "Facebook", "address": "脸书"},
    {"_id": 4, "name": "Taobao", "address": "淘宝"},
    {"_id": 5, "name": "Zhihu", "address": "知乎"}
]

x = mycol.insert_many(mylist)

# 输出插入的所有文档对应的 _id 值
print(x.inserted_ids)

查询表

#!/usr/bin/env Python
# coding=utf-8
import pymongo

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["rundb"]
mycol = mydb["sites"]

for x in mycol.find():
    print(x)

for x in mycol.find({},{ "_id": 0, "name": 1, "alexa": 1 }):
  print(x)


#条件查询
myquery = {"name": "RUNOOB"}

mydoc = mycol.find(myquery)

for x in mydoc:
    print(x)

#高级查询

myquery = {"name": {"$regex": "^R"}}

mydoc = mycol.find(myquery)

for x in mydoc:
    print(x)

myresult = mycol.find().limit(3)

# 输出结果
for x in myresult:
    print(x)

scrapy爬虫存储到mongo数据库

class  MongoPipeline(object):
    def __init__(self,mongo_uri,mongo_db):
        self.mongo_uri = mongo_uri
        self.mongo_db = mongo_db

    @classmethod
    def from_crawler(cls,crawler):
        return cls(
            mongo_uri=crawler.settings.get('MONGO_URI'),
            mongo_db=crawler.settings.get('MONGO_DB')
        )
    def open_spider(self,spider):
        self.client=pymongo.MongoClient(self.mongo_uri)
        self.db=self.client[self.mongo_db]
    def process_item(self,item,spider):
        #将类名作为表名
        name=item.__class__.__name__
        self.db[name].insert(dict(item))
        return item
    def close_spider(self,spider):
        self.client.close()

你可能感兴趣的:(mongodb的基本使用以及pymong的用法)