python 脚本 调用 google dictionary

阅读更多
网上有通过python来调用google translate的,不过结果不是很细, 于是又写了个取词典的.
需要lxml支持, xpath来解析文档比原始的 SGMLParser 要舒服很多.

import urllib2
import urllib
from StringIO import StringIO
from lxml import etree

def grabData(queryWord):
    requestUrl="http://www.google.cn/dictionary?langpair=en|zh-CN&q=what&hl=zh-CN&aq=f"
    requestUrl=requestUrl.replace("what",queryWord)
    req=urllib2.Request(requestUrl)
    data=urllib2.urlopen(req).read()
    return data

def parseData(htmlStr):
    result=[]
    parser=etree.HTMLParser()
    tree=etree.parse(StringIO(htmlStr),parser)
    element=tree.xpath("//div[@class='dct-srch-rslt']/ul[@class='dfnt']/li")
    for item in element:
        result.append(item.findtext("h4"))
        for subitem in item.xpath("ol/li"):
            detailItems=subitem.find("ul") #multi item
            if detailItems:
                result.append(subitem.findtext("span").replace("\n",""))
            else :
                detailItems=subitem

            ex=""
            for textElement in detailItems.iter():
                if textElement.text:
                    ex=ex+str(textElement.text)
            result.append(ex.replace("\n",""))
    return result

if __name__ == "__main__":
    content=parseData(grabData("love"))
    for line in content:
        print line


你可能感兴趣的:(Google,脚本,Python,F#)