欢迎使用CSDN-markdown编辑器

代码块

代码块语法遵循标准markdown代码,例如:

@requires_authorization

#setup.py build
#setup.py intall
from bs4 import BeautifulSoup
bs = BeautifulSoup

doc = [
    'Page title',
    '

This is paragraph one.', '

This is paragraph two.', '' ] soup = bs(''.join(doc)) #--- import re import urllib def getHtml(url): page = urllib.urlopen(url,proxies={'http': 'http://192.168.1.2:3128'}) html = page.read() return html def getImg(html): reg = r'src="(.+?\.jpg)" pic_ext' imgre = re.compile(reg) imglist = re.findall(imgre, html) return imglist def imgDownload(imglist): x = 0 for imgurl in imglist: urllib.urlretrieve(imgurl, '%s.jpg' % x) x+=1 print '第', x, '张图片下载完成' html = getHtml('http://tieba.baidu.com/p/2460150866') print getImg(html) #--- def getItemNum(url): # 功能:获取一个卖家的所有商品数目 # 输入: 一个卖家的任意商品列表页面 # 输出: 卖家的所有商品数目 raw = getHtml(url) p = re.compile(r'(.*)?') tmpNum = re.findall(p,raw) return tmpNum starttime = datetime.datetime.now() tmp = getItemNum(url) itemNum = int(tmp[0].replace(',', '')) endtime =datetime.datetime.now() print (endtime-starttime).seconds #=== html = getHtml(url) soup = bs(html) soup.find(id='descItemNumber').string #---- fid = open('fds.txt', 'r') lines = fid.readlines() # 带/n def getAddress(itemID): try: tmpUrl = 'http://www.ebay.com/itm/' + itemID html = getHtml(tmpUrl) soup = bs(html) xx = soup.select('div.iti-eu-bld-gry ') addr = bs(''.join(xx[0])) return addr except Exception, ex: print ex print '没有找到地址' return None

你可能感兴趣的:(Python)