[python]简单获取网页文件指定内容方法

阅读更多
#!/usr/bin/python

import urllib,HTMLParser

page_url = 'http://lists.meego.com/pipermail/meego-commits/2011-June/027331.html'

#get the url raw content
page_src_content = urllib.urlopen(page_url)
page_list = list(page_src_content)

def page_find(LST_NAME, LST_KEYWD, STRT_ELEMT = 0):
    POS_ELEMT = -1
    for i in range(STRT_ELEMT,len(LST_NAME)):
        if LST_NAME[i].find(LST_KEYWD) >= 0:
        POS_ELEMT = i
        break
        return POS_ELEMT

#get the line number matchs keywords
keywd_line = page_find(page_list, 'changes files:\n')
#print keywd_line

#get the end line number of the match keywords
end_line = page_list[keywd_line:].index('\n')
#print end_line

for i in range (keywd_line, keywd_line + end_line - 1):
    print page_list[i]

你可能感兴趣的:(python,htmlparser,meego,html)