这一节主要介绍了requests、beautifulsoup、HTMLParser、数据库编程、登录问题和豆瓣数据爬取。
import json
import requests
from PIL import Image
from io import BytesIO
print('dir(requests):', dir(requests))
url = 'http://www.baidu.com'
r = requests.get(url)
print('r.text:', r.text)
print('r.status_code:', r.status_code)
print('r.encoding:', r.encoding)
dir(requests): ['ConnectTimeout', 'ConnectionError', 'DependencyWarning', 'FileModeWarning', 'HTTPError', 'NullHandler', 'PreparedRequest', 'ReadTimeout', 'Request', 'RequestException', 'Response', 'Session', 'Timeout', 'TooManyRedirects', 'URLRequired', '__author__', '__build__', '__builtins__', '__cached__', '__copyright__', '__doc__', '__file__', '__license__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__title__', '__version__', 'adapters', 'api', 'auth', 'certs', 'codes', 'compat', 'cookies', 'delete', 'exceptions', 'get', 'head', 'hooks', 'logging', 'models', 'options', 'packages', 'patch', 'post', 'put', 'request', 'session', 'sessions', 'status_codes', 'structures', 'utils', 'warnings']
r.text: