依旧是老师的作业,开源精神指引着我……
以下是Python2.7代码,2018.5.22运行通过
# coding=utf-8
author__ = 'Read Air'
import cookielib
import urllib2
import re
import csv
def saveHtml(file_name, file_content):
# 注意windows文件命名的禁用符,比如 /
with open(file_name.replace('/', '_') + ".html", "wb") as f:
# 写文件用bytes而不是str,所以要转码
f.write(file_content)
cookie = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
opener.addheaders.append(('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; \
rv:11.0) like Gecko Core/1.63.5221.400 QQBrowser/10.0.1125.400'))
result_url = opener.open(
urllib2.Request(
'https://issues.apache.org/jira/browse/CAMEL-12525?jql=project%20%3D%20CAMEL%20AND%20resolution%20%3D%20Unresolved%20ORDER%20BY%20priority%20DESC%2C%20updated%20DESC'))
content = result_url.read()
print "Success in Web1!"
saveHtml("h", content)
re_law1 = '(.*?)
(.*?)
" # print re.findall(re_law3, content1, re.S) saveHtml(i[0], content1) out = open("BUG List.csv", "a+") if re.findall(re_law3, content1, re.S): description = re.findall(re_law3, content1, re.S)[0] else: description = "Not Found!" csv_writer = csv.writer(out, dialect="excel") csv_writer.writerow([i[0], description]) # print i[2] # print i[2] out.close()