1. 安装openoffice3 2. 启动服务 "C:\Program Files\OpenOffice.org 3.1\program\soffice.exe" -headless -accept="socket,port=8100;urp;" 3. 使用openoffice3自带python2.6(带有pyuno模块)进行转换 "C:\Program Files\OpenOffice.org 3.1\program\python.exe" DocumentConverter.py a.docx a.html
# # PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14 # # This script converts a document from one office format to another by # connecting to an OpenOffice.org instance via Python-UNO bridge. # # Copyright (C) 2008-2009 Mirko Nasato <[email protected]> # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html # - or any later version. # DEFAULT_OPENOFFICE_PORT = 8100 import uno from os.path import abspath, isfile, splitext from com.sun.star.beans import PropertyValue from com.sun.star.task import ErrorCodeIOException from com.sun.star.connection import NoConnectException FAMILY_TEXT = "Text" FAMILY_WEB = "Web" FAMILY_SPREADSHEET = "Spreadsheet" FAMILY_PRESENTATION = "Presentation" FAMILY_DRAWING = "Drawing" #---------------------# # Configuration Start # #---------------------# # see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter # most formats are auto-detected; only those requiring options are defined here IMPORT_FILTER_MAP = { "txt": { "FilterName": "Text (encoded)", "FilterOptions": "utf8" }, "csv": { "FilterName": "Text - txt - csv (StarCalc)", "FilterOptions": "44,34,0" } } EXPORT_FILTER_MAP = { "pdf": { FAMILY_TEXT: { "FilterName": "writer_pdf_Export" }, FAMILY_WEB: { "FilterName": "writer_web_pdf_Export" }, FAMILY_SPREADSHEET: { "FilterName": "calc_pdf_Export" }, FAMILY_PRESENTATION: { "FilterName": "impress_pdf_Export" }, FAMILY_DRAWING: { "FilterName": "draw_pdf_Export" } }, "html": { FAMILY_TEXT: { "FilterName": "HTML (StarWriter)" }, FAMILY_SPREADSHEET: { "FilterName": "HTML (StarCalc)" }, FAMILY_PRESENTATION: { "FilterName": "impress_html_Export" } }, "odt": { FAMILY_TEXT: { "FilterName": "writer8" }, FAMILY_WEB: { "FilterName": "writerweb8_writer" } }, "doc": { FAMILY_TEXT: { "FilterName": "MS Word 97" } }, "rtf": { FAMILY_TEXT: { "FilterName": "Rich Text Format" } }, "txt": { FAMILY_TEXT: { "FilterName": "Text", "FilterOptions": "utf8" } }, "ods": { FAMILY_SPREADSHEET: { "FilterName": "calc8" } }, "xls": { FAMILY_SPREADSHEET: { "FilterName": "MS Excel 97" } }, "csv": { FAMILY_SPREADSHEET: { "FilterName": "Text - txt - csv (StarCalc)", "FilterOptions": "44,34,0" } }, "odp": { FAMILY_PRESENTATION: { "FilterName": "impress8" } }, "ppt": { FAMILY_PRESENTATION: { "FilterName": "MS PowerPoint 97" } }, "swf": { FAMILY_DRAWING: { "FilterName": "draw_flash_Export" }, FAMILY_PRESENTATION: { "FilterName": "impress_flash_Export" } } } PAGE_STYLE_OVERRIDE_PROPERTIES = { FAMILY_SPREADSHEET: { #--- Scale options: uncomment 1 of the 3 --- # a) 'Reduce / enlarge printout': 'Scaling factor' "PageScale": 100, # b) 'Fit print range(s) to width / height': 'Width in pages' and 'Height in pages' #"ScaleToPagesX": 1, "ScaleToPagesY": 1000, # c) 'Fit print range(s) on number of pages': 'Fit print range(s) on number of pages' #"ScaleToPages": 1, "PrintGrid": False } } #-------------------# # Configuration End # #-------------------# class DocumentConversionException(Exception): def __init__(self, message): self.message = message def __str__(self): return self.message class DocumentConverter: def __init__(self, port=DEFAULT_OPENOFFICE_PORT): localContext = uno.getComponentContext() resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) try: context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port) except NoConnectException: raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context) def convert(self, inputFile, outputFile): inputUrl = self._toFileUrl(inputFile) outputUrl = self._toFileUrl(outputFile) loadProperties = { "Hidden": True } inputExt = self._getFileExt(inputFile) if IMPORT_FILTER_MAP.has_key(inputExt): loadProperties.update(IMPORT_FILTER_MAP[inputExt]) document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties)) try: document.refresh() except AttributeError: pass family = self._detectFamily(document) self._overridePageStyleProperties(document, family) outputExt = self._getFileExt(outputFile) storeProperties = self._getStoreProperties(document, outputExt) try: document.storeToURL(outputUrl, self._toProperties(storeProperties)) finally: document.close(True) def _overridePageStyleProperties(self, document, family): if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family): properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family] pageStyles = document.getStyleFamilies().getByName('PageStyles') for styleName in pageStyles.getElementNames(): pageStyle = pageStyles.getByName(styleName) for name, value in properties.items(): pageStyle.setPropertyValue(name, value) def _getStoreProperties(self, document, outputExt): family = self._detectFamily(document) try: propertiesByFamily = EXPORT_FILTER_MAP[outputExt] except KeyError: raise DocumentConversionException, "unknown output format: '%s'" % outputExt try: return propertiesByFamily[family] except KeyError: raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt) def _detectFamily(self, document): if document.supportsService("com.sun.star.text.WebDocument"): return FAMILY_WEB if document.supportsService("com.sun.star.text.GenericTextDocument"): # must be TextDocument or GlobalDocument return FAMILY_TEXT if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"): return FAMILY_SPREADSHEET if document.supportsService("com.sun.star.presentation.PresentationDocument"): return FAMILY_PRESENTATION if document.supportsService("com.sun.star.drawing.DrawingDocument"): return FAMILY_DRAWING raise DocumentConversionException, "unknown document family: %s" % document def _getFileExt(self, path): ext = splitext(path)[1] if ext is not None: return ext[1:].lower() def _toFileUrl(self, path): return uno.systemPathToFileUrl(abspath(path)) def _toProperties(self, dict): props = [] for key in dict: prop = PropertyValue() prop.Name = key prop.Value = dict[key] props.append(prop) return tuple(props) if __name__ == "__main__": from sys import argv, exit if len(argv) < 3: print "USAGE: python %s <input-file> <output-file>" % argv[0] exit(255) if not isfile(argv[1]): print "no such input file: %s" % argv[1] exit(1) try: converter = DocumentConverter() converter.convert(argv[1], argv[2]) except DocumentConversionException, exception: print "ERROR! " + str(exception) exit(1) except ErrorCodeIOException, exception: print "ERROR! ErrorCodeIOException %d" % exception.ErrCode exit(1)