Python网络数据抓取

import requests  #网页抓取
from bs4 import BeautifulSoup   #内容解析

import re  #正则表达式处理

#https://www.douban.com/robots.txt
r = requests.get('https://book.douban.com/subject/1986590/comments/')
#r = requests.get('https://book.douban.com/subject/1986590/comments/hot?p=4')

print(r.status_code)
#print(r.text)

#数据解析
#markup = '

The Little Prince

'
#soup = BeautifulSoup(markup, "lxml") #print(soup.b) #print(soup.p) #tag = soup.p soup = BeautifulSoup(r.text, "lxml") pattern = soup.find_all('p','comment-content') for item in pattern: print(item.string) #re = requests.get('http://money.cnn.com/data/dow30') pattern_s = re.compile(') # p = re.findall(pattern_s, r.text) sum = 0 for star in p: sum += int(star) print(sum)

参考资料

http://www.icourse163.org/learn/NJU-1001571005?tid=1002097008#/learn/content?type=detail&id=1002815204&cid=1003184422&replay=true

你可能感兴趣的:(Programming)