python用lxml库直接读写office excel文件

python读写excel文件有好几个工具。我用过pywin32。但最近发现用直接操纵xml的方法更快,而且不依赖于平台。excel文件可以保存为xml文件,之后读写就变成了操作xml Element。excel 文件的 格式还有需要了解的地方。下面代码是读写Cell的例子。

def setCell(table,row,col,value):
    row=row-1
    col=col-1
    rows=table.findall("{urn:schemas-microsoft-com:office:spreadsheet}Row")
    row1=rows[row]
    cells=row1.findall("{urn:schemas-microsoft-com:office:spreadsheet}Cell")
    #{urn:schemas-microsoft-com:office:spreadsheet}Index
    at=0
    mycells={}
    for cell in cells:#MergeAcross
        #print(cell.attrib)
        if cell.attrib.get("{urn:schemas-microsoft-com:office:spreadsheet}Index")==None:
            mycells[at]=cell
            at+=1
        else:
            at=int(cell.attrib["{urn:schemas-microsoft-com:office:spreadsheet}Index"])
            mycells[at-1]=cell
        if cell.attrib.get("{urn:schemas-microsoft-com:office:spreadsheet}MergeAcross")!=None:
            at+=int(cell.attrib["{urn:schemas-microsoft-com:office:spreadsheet}MergeAcross"])
    # print(mycells)
    # print(col)
    # input("here")
    data=mycells.get(col).find("{urn:schemas-microsoft-com:office:spreadsheet}Data")
    if data==None:
        data=ET.fromstring('<Data xmlns="urn:schemas-microsoft-com:office:spreadsheet" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:html="http://www.w3.org/TR/REC-html40" ss:Type="String">CS-2800□</Data>')
        mycells.get(col).append(data)
    data.text=value#"合同号:"+contact.hetongbh
def setCellWithFont(table,row,col,values):
    row=row-1
    col=col-1
    rows=table.findall("{urn:schemas-microsoft-com:office:spreadsheet}Row")
    row1=rows[row]
    cells=row1.findall("{urn:schemas-microsoft-com:office:spreadsheet}Cell")
    #{urn:schemas-microsoft-com:office:spreadsheet}Index
    at=0
    mycells={}
    for cell in cells:#MergeAcross
        #print(cell.attrib)
        if cell.attrib.get("{urn:schemas-microsoft-com:office:spreadsheet}Index")==None:
            mycells[at]=cell
            at+=1
        else:
            at=int(cell.attrib["{urn:schemas-microsoft-com:office:spreadsheet}Index"])
            mycells[at-1]=cell
        if cell.attrib.get("{urn:schemas-microsoft-com:office:spreadsheet}MergeAcross")!=None:
            at+=int(cell.attrib["{urn:schemas-microsoft-com:office:spreadsheet}MergeAcross"])
    data=mycells.get(col).find("{urn:schemas-microsoft-com:office:spreadsheet}Data")
    if data==None:
        data=ET.fromstring('<Data xmlns="urn:schemas-microsoft-com:office:spreadsheet" xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:html="http://www.w3.org/TR/REC-html40" ss:Type="String">CS-2800□</Data>')
        mycells.get(col).append(data)
        data.text="".join(values)#"合同号:"+contact.hetongbh
    else:
        fonts=data.findall("{http://www.w3.org/TR/REC-html40}Font")
        at=0
        for font in fonts:
            if at==len(values):
                break
            print(at,font.text)
            font.text=values[at]
            at+=1
def getCell(table,row,col):
    row=row-1
    col=col-1
    rows=table.findall("{urn:schemas-microsoft-com:office:spreadsheet}Row")
    row1=rows[row]
    cells=row1.findall("{urn:schemas-microsoft-com:office:spreadsheet}Cell")
    #{urn:schemas-microsoft-com:office:spreadsheet}Index
    #print(cells)
    at=0
    mycells={}
    for cell in cells:#MergeAcross
        #print(cell.attrib)
        if cell.attrib.get("{urn:schemas-microsoft-com:office:spreadsheet}Index")==None:
            mycells[at]=cell
            at+=1
        else:
            at=int(cell.attrib["{urn:schemas-microsoft-com:office:spreadsheet}Index"])
            mycells[at-1]=cell
        if cell.attrib.get("{urn:schemas-microsoft-com:office:spreadsheet}MergeAcross")!=None:
            at+=int(cell.attrib["{urn:schemas-microsoft-com:office:spreadsheet}MergeAcross"])
    #print(mycells)
    data=mycells.get(col).find("{urn:schemas-microsoft-com:office:spreadsheet}Data")
    if data==None:
        return ""#None
    else:
        #print(data.attrib)
        if data.attrib.get('{urn:schemas-microsoft-com:office:spreadsheet}Type')=="String":
            return data.text#"合同号:"+contact.hetongbh
        elif data.attrib.get('{urn:schemas-microsoft-com:office:spreadsheet}Type')=="Number":
            return float(data.text)#"合同号:"+contact.hetongbh
        else:
            return data.text



你可能感兴趣的:(xml,python,Excel)