python2.7实现从JIRA爬CAMEL项目中所有BUG并保存到CSV文件

依旧是老师的作业,开源精神指引着我……

以下是Python2.7代码,2018.5.22运行通过


# coding=utf-8
author__ = 'Read Air'
import cookielib
import urllib2
import re
import csv


def saveHtml(file_name, file_content):
    #    注意windows文件命名的禁用符,比如 /
    with open(file_name.replace('/', '_') + ".html", "wb") as f:
        #   写文件用bytes而不是str,所以要转码
        f.write(file_content)


cookie = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
opener.addheaders.append(('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; \
rv:11.0) like Gecko Core/1.63.5221.400 QQBrowser/10.0.1125.400'))

result_url = opener.open(
    urllib2.Request(
        'https://issues.apache.org/jira/browse/CAMEL-12525?jql=project%20%3D%20CAMEL%20AND%20resolution%20%3D%20Unresolved%20ORDER%20BY%20priority%20DESC%2C%20updated%20DESC'))
content = result_url.read()
print "Success in Web1!"
saveHtml("h", content)
re_law1 = '
(.*?)
' key_data = re.findall(re_law1, content, re.S) re_law2 = "\"(.*?)\"[\s\S]+?

(.*?)

" # print re.findall(re_law3, content1, re.S) saveHtml(i[0], content1) out = open("BUG List.csv", "a+") if re.findall(re_law3, content1, re.S): description = re.findall(re_law3, content1, re.S)[0] else: description = "Not Found!" csv_writer = csv.writer(out, dialect="excel") csv_writer.writerow([i[0], description]) # print i[2] # print i[2] out.close()

你可能感兴趣的:(Python,爬虫)