Python英语单词查询

    要考英语了, 单词一大堆, 索性就用Python自动到网上找单词的中文意思了~.
目前只是盲目的摘下来而已. 
    写的过程中,终于知道编码问题是多么的严重了. 下次一定要用chardet这个库了,方便快捷...

# http: //dict.youdao.com/search?q=hello&tab=chn&keyfrom=dict.result can' use it , be-
# cause it is python's bug
import urllib
from BeautifulSoup import BeautifulSoup
import sys
global    file

def getWebContent(url, word):
        html = urllib.urlopen(url).read()
        #html = html.decode( "gb2312", "ignore").encode( "utf-8", "ignore")
        html = unicode(html, "gb2312", "ignore").encode( "utf-8", "ignore")
        soup = BeautifulSoup(html)
    
        #filter 1
        data = str(soup.find( "div", { "class": "explain"}))
        #strContent = data.renderContents()+ "\n" # default the string s is coded with ASCII
                                                                                                     # but the original is UTF-8, because the
                                                                                                     # beautifulSoup use it...
        #fileter 2
        soup = BeautifulSoup(data)
        # beautifulsoup generator http: //www.crummy.com/software/BeautifulSoup/documentation.zh.html#Generators
        outtext=''.join([element     for element in soup.recursiveChildGenerator() if isinstance(element,unicode)])
        #make some rendering
         for item in range(1,10):
                outtext=outtext.replace(str(item), "\n%s" % str(item))
        outtext=outtext.replace( "    ", "\n")
        outtext =word + ":\n" +outtext + "\n"
        file.write(outtext)
        print outtext.decode( "utf-8").encode( "gbk")


def word_FromFile():
        file = open( "F:/Whu/EnghlishWords.txt", "r")
         for word in file.readlines():
                print isinstance(word, unicode)
                print word.decode( "utf-8")

                #must be carefully!!!
                #because we use the utf-8 to store the Chinese words in notepad
                #it will add another 3 words to mark
                #     if file[:3] == codes.BOM_UTF8;
                #            data = data[3:]
                #            print data.decode( "utf-8")
                
                url = "http://dict.baidu.com/s?wd=%s" % word
                getWebContent(url, word)
if __name__ == '__main__':
        reload(sys)
        sys.setdefaultencoding('utf-8')
        file = open("F:/Whu/EnghlishWords_translate.txt",'w')
        word_FromFile()
        file.flush()
        file.close()
    

你可能感兴趣的:(python,职场,休闲)