从ppt中获取文本框内容及位置信息,并写入到数据库中
import pymysql
import sys
import re
from pptx import Presentation
from pptx.util import Inches
from pptx.chart.data import ChartData
from pptx.enum.chart import XL_TICK_MARK
from pptx.util import Pt
from pptx.dml.color import RGBColor
from pptx.enum.chart import XL_DATA_LABEL_POSITION
from pptx.enum.chart import XL_LEGEND_POSITION
from pptx.enum.chart import XL_CHART_TYPE
from pptx.enum.chart import XL_MARKER_STYLE
from pptx.enum.chart import XL_TICK_LABEL_POSITION
from pptx import Presentation
from pptx.enum.text import PP_ALIGN
def fn_ppt_get_object_text(mydb,filePath):
#mydb = pymysql.connect("192.168.80.224","root","123","baoxian",port=3306,charset='utf8' )#192.168.80.224
#mydb.set_character_set('utf8')
mycursor = mydb.cursor()
mycursor.execute('SET CHARACTER SET utf8;')
mycursor.execute('SET NAMES utf8;')
mycursor.execute('SET character_set_connection=utf8;')
#fliePath = 'C:/Users/Administrator/Desktop/zy2.pptx'
prs = Presentation(filePath) #导入ppt
x=len(prs.slides)
print(x)
print(len(prs.slides[0].shapes))
for pageNum in range(0,len(prs.slides)):
objectCount = len(prs.slides[pageNum].shapes)
objectContent = prs.slides[pageNum].shapes
for o in range(0,objectCount): #o = objectNum
print(objectContent[o])
if objectContent[o].has_text_frame:
print(objectContent[o].text)
# print(objectContent[o].left,objectContent[o].top,objectContent[o].width,objectContent[o].height)
isNeedReplace = 1
placeholder_text = objectContent[o].text
if isNeedReplace == 1 and len(placeholder_text)>20:
digitalStrList = re.findall(r"\d+\.?\d*",objectContent[o].text) #对广西中的数字进行占位处理
digitalStrList.sort(key = lambda i:len(i),reverse=True) #对数字进行倒序替换,尽量减少替换时遇到的问题
for digital in digitalStrList:
placeholder_text =placeholder_text.replace(digital,'__') #对数字进行倒序替换,尽量减少替换时遇到的问题
print ('-'*100)
print(placeholder_text)
sqlmyExe = '''INSERT INTO `baoxian`.`auto_ppt_model`
( `fileadr`, `slide_id`, `object_id`, `object_type`, `object_memo`, `stleft`, `sttop`, `stwidth`, `stheight`, `texts`, `cdate`)
values('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}','{9}',now())
'''.format(filePath,pageNum,o,'TEXT','has_text_frame',objectContent[o].left,objectContent[o].top,objectContent[o].width,objectContent[o].height,placeholder_text)
print(sqlmyExe)
mycursor.execute(sqlmyExe)
mydb.commit()
#sys.exit()