python鐖櫕绗旇

1銆乺equests.get/post(url,headers = headers, params = params/data=data)

#浣跨敤requests搴� 鏍规嵁缃戦〉get鎴杙ost璇锋眰锛堝瓨鍦ㄤ紶鍙傜殑璇漡et鐢╬arams,post鐢╠ata锛�,寰楀埌涓�涓猂esponse瀵硅薄

Response.text(鎵撳嵃鏂囨湰)

Response.encoding锛堟枃鏈紪鐮侊級琛ュ厖锛�decode锛堬級瑙g爜锛歞ecode('gbk')鎶婁竴涓�榞bk鈥欒浆unicode缂栫爜锛沞ncode锛堬級缂栫爜锛氭妸unicode杞叾浠栫紪鐮�

Response.content锛堜互瀛楄妭鐨勬柟寮忚闂姹傚弽搴旓級

Response.status_code(鍝嶅簲鐘舵�佺爜)

python鐖櫕绗旇_第1张图片

2銆亁path瑙f瀽

from lxml import etree(瀵煎寘)

html_page = etree.HTML(Response.content)

data = html_page.xpath('//select[@id="cx_province"]/option/@value')(xpath鎻愬彇)

3銆乥s4鎻愬彇

from bs4 import BeautifulSoup(瀵煎寘)

soup=BeautifulSoup(Response.content,鈥榣xml鈥�)

links = soup.find_all('a',class_='')#鏌ユ壘鎵�鏈塩las=鈥樷�欑殑a鏍囩

for聽 a in links:a.text(a鏍囩涓嬬殑鏂囨湰)锛宎.get('href')(a鏍囩鐨勮繛鎺�)

4銆佹鍒欐彁鍙�

銆傘�傘�傘��

5銆乯son鏁版嵁鎻愬彇

json.loads()鎴栬�卐val()

6銆佷繚瀛榮ession瀵硅瘽璁块棶鍜屼唬鐞唅p璁块棶

def ip_sql():

#瀛樺叆鏁版嵁搴�

db = MySQLdb.connect("192.168.201.91","root","123456","spidertools" )# 鎵撳紑鏁版嵁搴撹繛鎺�

cursor = db.cursor()# 浣跨敤cursor()鏂规硶鑾峰彇鎿嶄綔娓告爣

sql="SELECT * FROM proxyippool; "# 鍒涘缓鏁版嵁琛⊿QL璇彞

cursor.execute(sql)

results = cursor.fetchall()# 鑾峰彇鎵�鏈夎褰曞垪琛�

for row in results:

ip=row[0]+'://'+row[1]+':'+row[2]

ip_data.append(ip)

db.commit()

# 鍏抽棴鏁版嵁搴撹繛鎺�

db.close()

# return ip_data



def savesql_hotel(data,savepoint_name):

import sqlite3

聽 聽 #鍒涘缓鏁版嵁搴搒qlite3

聽 聽 con=sqlite3.connect(savepoint_name)

聽 聽 con.execute('''CREATE TABLE IF NOT聽 EXISTS data

聽 聽 聽 聽 (

聽 聽 聽 聽 hoteltype varchar(200) NOT NULL,

聽 聽 聽 聽 -- province varchar(200) NOT NULL,

聽 聽 聽 聽 city varchar(200) NOT NULL,

聽 聽 聽 聽 name varchar(1000) DEFAULT NULL,

聽 聽 聽 聽 address varchar(1000) DEFAULT NULL,

聽 聽 聽 聽 salesTel varchar(100) DEFAULT NULL,

聽 聽 聽 聽 lat varchar(100) DEFAULT NULL,

聽 聽 聽 聽 lng varchar(100) DEFAULT NULL,

聽 聽 聽 聽 Url聽 varchar(1000) DEFAULT NULL);''')

聽 聽 # 鎻掑叆鏁版嵁

聽 聽 sql='insert into data(hoteltype,city,name,address,salesTel,lat,lng,Url)\

聽 聽 values("%s","%s","%s","%s","%s","%s","%s","%s")'%(u'',data[0],data[1],data[2],data[3],data[4],data[5],data[6])

聽 聽 con.execute(sql)

聽 聽 con.commit()

聽 聽 con.close()


se = requests.session()

ip= random.sample(ip_data,1)[0]浠庡緱鍒扮殑浠g悊ip姹犱腑闅忔満鍙栧嚭涓�涓�

se.proxies = {'http': ip,'https': ip}

jstext=se.get('http://www.sicy......',headers=headers).content

你可能感兴趣的:(python鐖櫕绗旇)