Requests
import requests from PIL import Image from io improt BytesTO import jason url = "..........." print(dir(requests)) #查看基本的用法 r = requests.get(url) print(r.text) print(r.status_code) print(r.encoding)
传递参数
params = {'k1':'v1','k2':'v2','k3':[1,2,3],'k4':None} #key的值是list的话就会一一赋值 r = requests.get('http://httpbin.org/get',params) print(r.url)
二进制数据
r= requests.get('.........') image = Image.open(BytesTO(r.content)) image.save('图片.jpg')
json处理
r = requests.get('https://github.com/timeline.jason') print(type(r.json)) print(r.json) print(r.text)
原始数据处理
r= requests.get('.........') with open('图片.jpg','wb+') as f : for chunk in r.iter_content(1024): f.write(chunk)
提交表单
form = {‘username’:‘xxx’,'ddddefsc':'dscdsc'} r = requests.post('http://httpbin.org/post',data = form) r = requests.post('http://httpbin.org/post',data = jason.dumps(forms)) print(r.text) cookies url ='xxxxxxxxxxxx' r = requests.get(url) cookies = r.cookies for k,v in cookies.get_dict().items(): 标准的获取cookies print(k,,v) cookies = {'c1':'v1'} r = requests.get('http://httpbin.org/cookies',cookies= cookies) print(r.text)
重定向和重定向历史 网站跳转的时候跟踪用
r= requests.head('http://www.baidu.com',allow_redirects = True) print(r.url) print(r.status_code) print(r.history)
代理
proxies = {'http':'...','https:'.....'} #可以用来科学上网嘻嘻 r = requests.get('http://httpbin.org/cookies',proxies= proxies)
Beautiful Soup
from bs4 import BeautifulSoup #Tag soup = Beautifulsoup(open('test.html')) print(soup.prettify()) print(soup.title.name) print(soup.title) #String print(type(soup.title.string)) print(soup.title.string) #Comment注释 print(type(soup.a.string)) print(soup.a.name) for items in soup.body.contents: print(item.name) #只找子元素的 css查询 print(soup.select('.sister')) #返回到是数组 print(soup.select('a')) print(soup.select('#link'')) #从id开始找 print(soup.select('head >title''))
Htmlparser
from HTMLParser import HTMLParser
clase MyParser(HTMLParser):
def handle_decl(self,decl):
HTMLParser.handle_decl(self,decl)
print('decl %s'% decl)
def handle_starttag(self,tag,attrs):
HTMLParser.handle_starttag(self,tag,attrs)
print('<'+tag+'>')
def handle_endtag(self,decl):
HTMLParser.handle_endtag(self,decl)
print('<'+tag+'>')
def handle_data(self,data):
HTMLParser.handle_data(self,data)
print('data %s',data)
def handle_startendtag(self,tag,attrs):
HTMLParser.handle_startendtag(self,tag,attrs)
print('<'+tag+ '>')
def handle_comment(self,data):
HTMLParser.handle_comment(self,data)
print('data %s',data)
def close(self):
HTMLParser.close(self)
print('Close')
demo = MyParser()
demo.feed(open('hello.html')).read()
demo.close
html格式的尽量不要用xml的方式去处理,因为html可能格式不完整
sqlite3
import sqlite3 conn =sqlite3.connect('test.db') create_sql = 'create table company(id int primary key not null,emp_name text not null );' conn.execute(create_sql) insert_sql = 'insert into company values(?,?)' conn.execute(insert_sql,(100,'LY')) conn.execute(insert_sql,(200,'July')) cursors = conn.execute('select id,emp_name from company') for row in cursors: print(row[0],row[1]) conn.close()
mySQL
需要指定mysql:host(ip/port),username,password,
然后在插入数据后要记得使用conn.commit