>>> import webbrowser
>>> webbrowser.open('http://www.baidu.com')
http://news.baidu.com/ns?word=搜索内容
https://www.baidu.com/s?wd='+address
百度搜索
if len(sys.argv) > 1:
#Get address from command line
address = ' '.join(sys.argv[1:])
address = pyperclip.paste()
webbrowser.open('https://www.baidu.com/s?wd='+address)
requests
是第三方文件
import requests
快速上手 — Requests 2.18.1 文档
>>> import requests
>>> res = requests.get('http://blog.csdn.net/mq_go')
>>> type(res)
<class 'requests.models.Response'>
>>> res.status_code == requests.codes.ok
True
>>> len(res.text)
40492
>>> print(res.text[:500])
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=Edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, maximum-scale=1.0, user-scalable=no">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="canonical" href="http://blog.csdn.net/mq_go"/>
<script type="text/javascript">
console.log('version: phoeni
res.raise_for_status()
res = requests.get(‘http://blog.csdn.net/mq_go‘)
res.raise_for_status()
res = requests.get(‘http://aosdfhaufa.com/‘)
>>> import requests
>>> re = requests.get('http://www.baidu.com')
>>> re.raise_for_status()
>>> playFile = open('baidu_html.txt','wb')
>>> for chunk in re.iter_content(100000):
playFile.write(chunk)
2381
>>> playFile.close()
使用载的网页创建
>>> import bs4,requests
>>> re = requests.get('http://blog.csdn.net/Mq_Go')
>>> re.status_code
200
>>> re.status_code == requests.codes.ok
True
>>> re.raise_for_status()
>>> shup = bs4.BeautifulSoup(re.text)
>>> type(shup)
<class 'bs4.BeautifulSoup'>
使用本地是html文件
>>> exampleFile = open('example.html')
>>> example = exampleFile.read()
>>> exampleShup = bs4.BeautifulSoup(example)
>>> type(exampleShup)
<class 'bs4.BeautifulSoup'>
传递给select()方法的选择器 | 将匹配… |
---|---|
soup.select(‘div’) | 所有名为
|
soup.select(‘#author’) | 带有id 属性为author的元素 |
soup select(‘.notice’) | 所有使用CSS class 属性名为notice 的元素 |
soup.select(‘div span’) | 所有在
|
soup.select(‘div>span’) | 所有直接在
|
soup.select(input[name]’) | 所有名为 ,并有一个name 属性,其值无所谓的元素 |
soup.select(‘input[type=”button”]’) | 所有名为 ,并有一个type 属性,其值为button 的元素 |
>>> import bs4
>>> exampleFile = open('example.html')
>>> exampleShup = bs4.BeautifulSoup(exampleFile.read())
>>> elems = exampleShup.select('#author')
>>> type(elems)
<class 'list'>
>>> len(elems)
1
>>> type(elems[0])
<class 'bs4.element.Tag'>
>>> elems[0].getText()
'AI Sweigart'
>>> elems[0]
"author">AI Sweigart
>>> elems[0].attrs
{'id': 'author'}
>>> pElems = exampleShup.select('p')
>>> len(pElems)
3
>>> print(str(pElems[0])+'\n'+str(pElems[1])+'\n'+str(pElems[2]))
<p>Download my <strong>Pythonstrong> book from <a href="http://inventwitnpython.com">My websitea>.p>
<p class="slogan">Learn Python the easy way!p>
<p>By<span id="author">AI Sweigartspan>p>
>>> print(str(pElems[0].getText())+'\n'+str(pElems[1].getText())+'\n'+str(pElems[2].getText()))
Download my Python book from My website.
Learn Python the easy way!
ByAI Sweigart
>>> import bs4
>>> soup = bs4.BeautifulSoup(open('example.html'))
>>> soanElem = soup.select('span')[0]
>>> soanElem.get('id')
'author'
>>> str(soanElem)
'
'>>> soanElem.attrs
{'id': 'author'}
#! python3
#lucky.py - Opens several baidu search results.
#
#
#Author : qmeng
#MailTo : [email protected]
#QQ : 1163306125
#Blog : http://blog.csdn.net/Mq_Go/
#Create : 2018-02-08
#Version: 1.0
import requests,sys,bs4,webbrowser
print('Baidu...')
re = requests.get('https://www.sogou.com/web?query='+' '.join(sys.argv[1:]))
print('https://www.sogou.com/web?query='+' '.join(sys.argv[1:]))
re.raise_for_status()
#print(re.text[:5600])
#Retrieve top search result links.
soup = bs4.BeautifulSoup(re.text, "html.parser")
#Open a browser tab for each result.
linkElems = soup.select('.vrTitle a')
numopen = min(3,len(linkElems))
for i in range(numopen):
print('已经为您打开'+linkElems[i].getText())
print()
webbrowser.open(linkElems[i].get('href'))
#! python3
#downloadXKcd.py - downloads every single XKCD comic.
#
#
#Author : qmeng
#MailTo : [email protected]
#QQ : 1163306125
#Blog : http://blog.csdn.net/Mq_Go/
#Create : 2018-02-08
#Version: 1.0
import requests,os,bs4
url = 'http://xkcd.com'
os.makedirs('xkcd',exist_ok=True)
k = 0
while not url.endswith('#') and k != 5:
k = k + 1
#download the page.
print('Downloading page %s...'%(url))
res = requests.get(url)
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text,"html.parser")
#find the URL of the comic image.
comicElem = soup.select('#comic img')
if comicElem == []:
print('Could not find comic image.')
else:
comicUrl = comicElem[0].get('src')
comicUrl = 'http://xkcd.com' + comicUrl[1:]
print('Downloading image %s...'%(comicUrl))
res = requests.get(comicUrl)
res.raise_for_status()
imgFile = open(os.path.join('xkcd',os.path.basename(comicUrl)),'wb')
for chunk in res.iter_content(100000):
imgFile.write(chunk)
imgFile.close()
prevLink = soup.select('a[rel="prev"]')[0]
url = 'http://xkcd.com' + prevLink.get('href')