python-pptx随笔记day2-从ppt中获取文本框内容及位置信息,并写入到数据库中

从ppt中获取文本框内容及位置信息,并写入到数据库中

import pymysql
import sys
import re
from pptx import Presentation
from pptx.util import Inches
from pptx.chart.data import ChartData
from pptx.enum.chart import XL_TICK_MARK
from pptx.util import Pt
from pptx.dml.color import RGBColor
from pptx.enum.chart import XL_DATA_LABEL_POSITION
from pptx.enum.chart import XL_LEGEND_POSITION
from pptx.enum.chart import XL_CHART_TYPE
from pptx.enum.chart import XL_MARKER_STYLE
from pptx.enum.chart import XL_TICK_LABEL_POSITION
from pptx import Presentation
from pptx.enum.text import PP_ALIGN


def fn_ppt_get_object_text(mydb,filePath):


    #mydb = pymysql.connect("192.168.80.224","root","123","baoxian",port=3306,charset='utf8' )#192.168.80.224
    #mydb.set_character_set('utf8')
    mycursor = mydb.cursor()
    mycursor.execute('SET CHARACTER SET utf8;')
    mycursor.execute('SET NAMES utf8;')
    mycursor.execute('SET character_set_connection=utf8;')

    #fliePath = 'C:/Users/Administrator/Desktop/zy2.pptx'
    

    prs = Presentation(filePath) #导入ppt
    x=len(prs.slides)
    print(x)
    print(len(prs.slides[0].shapes))

    for pageNum in range(0,len(prs.slides)):
        objectCount = len(prs.slides[pageNum].shapes)
        objectContent = prs.slides[pageNum].shapes
        for o in range(0,objectCount):  #o = objectNum
            print(objectContent[o])
            if objectContent[o].has_text_frame:
                print(objectContent[o].text)
                # print(objectContent[o].left,objectContent[o].top,objectContent[o].width,objectContent[o].height)
                isNeedReplace = 1
                placeholder_text = objectContent[o].text
                if isNeedReplace == 1 and len(placeholder_text)>20:
                    digitalStrList = re.findall(r"\d+\.?\d*",objectContent[o].text)   #对广西中的数字进行占位处理
                    digitalStrList.sort(key = lambda i:len(i),reverse=True)           #对数字进行倒序替换,尽量减少替换时遇到的问题
                    for digital in digitalStrList:
                        placeholder_text =placeholder_text.replace(digital,'__')      #对数字进行倒序替换,尽量减少替换时遇到的问题
                print ('-'*100)
                print(placeholder_text)

                sqlmyExe = '''INSERT INTO `baoxian`.`auto_ppt_model`
                            ( `fileadr`,  `slide_id`,  `object_id`,  `object_type`,  `object_memo`,  `stleft`,  `sttop`,  `stwidth`,  `stheight`,  `texts`, `cdate`)
                             values('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}','{9}',now())
                           '''.format(filePath,pageNum,o,'TEXT','has_text_frame',objectContent[o].left,objectContent[o].top,objectContent[o].width,objectContent[o].height,placeholder_text)
                print(sqlmyExe)
                mycursor.execute(sqlmyExe)
    mydb.commit()
    #sys.exit()

 

你可能感兴趣的:(python-pptx)