爬取58同城二手笔记本价格数据

importpickle

frombs4importBeautifulSoup

importrequests

#把网页html存到本地文件,测试用

defhtml_save(url):

filename =input('请输入要保存文件名称')

f =open(filename,'wb')

html = requests.get(url).text

html_list = [url,html]

pickle.dump(html_list,f)

f.close()

#读取本地html文件,测试用

defhtml_load(filename):

f =open(filename,'rb')

html_list = pickle.load(f)

f.close()

returnhtml_list

#解析html文件

defhtml_jx(html,all_list):

soup = BeautifulSoup(html,'lxml')

name_list = soup.select('a.t')

price_list = soup.select('b.pri')


all_list.append([name_list[i].string,price_list[i].string])

returnall_list

#找到下一页连接

deffind_next(html):

soup = BeautifulSoup(html,'lxml')

next_link ='http://zz.58.com'+soup.select('a.next')[0].get('href')

returnnext_link

all_list = []

html = requests.get('http://zz.58.com/bijiben/0/?PGTID=0d100000-0015-624b-2e87-3e5214b563a9&ClickID=1').text

foreachinrange(12):

all_list = html_jx(html,all_list)

ifeach !=11:

url_ne = find_next(html)

html = requests.get(url_ne).text

#把信息列表保存为本地文件

f =open('lifile','wb')

pickle.dump(all_list,f)

f.close()

你可能感兴趣的:(爬取58同城二手笔记本价格数据)