pymongo


>>> import pymongo
启动mongo服务:
$ mongod
与mongoclient连接:

>>> from pymongo import MongoClient
>>> client = MongoClient()

>>> client = MongoClient('localhost', 27017)

>>> client = MongoClient('mongodb://localhost:27017/')

取得数据库:
>>> db = client.test_database
>>> db = client['test-database']

取得表:
>>> collection = db.test_collection
>>> collection = db['test-collection']

插入:

>>> uid = {'id':'5189090045'}
>>> collection.insert_one(uid)

查找:

>> uid2 = {'id':'123456789'}
>>> collection.insert_one(uid2)
# 表中现在有两条数据
>>> collection.find_one() # 取得第一条数据
{'_id': ObjectId('58b500e37dffdd2be832cbf7'), 'id': '5189090045'}
>>> collection.find_one({'id':'123456789'}) #取得指定数据
{'_id': ObjectId('58b501c17dffdd2be832cbf8'), 'id': '123456789'}

删除:

collection.remove(collection.find_one({'id':'123456789'}))

eg:

import requests
from bs4 import BeautifulSoup
import pymongo
from pymongo import MongoClient
from time import time
start = time()
client = MongoClient('localhost', 27017)
db = client.DoubanMovie 
collection = db.Single
posts = db.posts

for page in list(x*25 for x in range(0,10)):
    url = 'https://movie.douban.com/top250?start='+str(page)+'&filter='
    r = requests.get(url)
    soup = BeautifulSoup(r.text,"lxml")
    for info in soup.find_all(class_='info'):
        name = info.find('a').text.strip('\n')
        introduction = info.find('p').text.strip()
        score = info.find(class_='rating_num').text
        try:
            inq = info.find(class_='inq').text
        except AttributeError:
            inq = ''
        post = {
            'name':name,
            'introduction':introduction,
            'score':score,
            'inq':inq,
        }
        posts.insert_one(post)
end = time()
print ('Cost {} seconds'.format((end - start)))
# Cost 4.598546266555786 seconds

你可能感兴趣的:(pymongo)