其中有些是我自加的处理,不用搭理
#coding:utf-8
import os
import win32com
import win32com.client as win32
from Ltp import ltp_db
from win32com.client import Dispatch, constants
#获取相对路径下所有word名称
def listdir(path, list_name):
for file in os.listdir(path):
file_path = os.path.join(path, file)
if os.path.isdir(file_path):
listdir(file_path, list_name)
else:
list_name.append(file_path)
def get_column_string(column):
columnStr = str(column)
columnStr = columnStr.replace('\r', '')
columnStr = columnStr.replace('\07', '')
columnStr = columnStr.replace('\n', '')
return columnStr
#解析word文档
def PraseWord(docName,client,conn):
ParaName = []
cursor = conn.cursor()
#路径问题(绝对路径)
#doc = client.Documents.Open(docName)
#test
client.Documents.Open(r'F:\1.doc')
docStr = client.ActiveDocument
for s in range(len(docStr.Paragraphs)):
ParaStyle = docStr.Paragraphs[s].Range.style
print(ParaStyle,type(ParaStyle))
if ParaStyle == "普通(网站)":
str = docStr.Paragraphs[s].Range.text
if ParaStyle[0].isdigit():
ParaName.append(str)
print (str)
# word中表格数量
numTables = docStr.Tables.Count
#对word中所有表格进行操作
for i in range(1, numTables + 1):
table = docStr.Tables(i)
numRows = table.Rows.Count
for i in range(1, numRows+1):
columnName = get_column_string(table.Cell(i, 1))
content = get_column_string(table.Cell(i, 2))
#内容处理
#todo
#主调用
def insert_Attibute(conn):
cl = win32com.client.Dispatch('Word.Application')
cl.Visible = 1
list_name=[]
#项目路径
listdir("..\sources\属性",list_name)
for i in range(len(list_name)):
PraseWord(list_name[0],cl,conn)