中文报错ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control chara

最近在研究python生成word文档,此处利用python-docx模块,发现含有中文时报错如下:

ValueError: All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters

修改为如下格式,即可规避该错误

p = document.add_paragraph(u"哈哈 ")

看到有同学在问添加的字符串变量怎么办,以下代码是当时写的自动生成关于pbootcms漏洞报告函数,title_1是网站名称为中文,最终会经过处理写入文档,主要方法是    weak_des = u" " + title + u";" + ip_info,大家可参考下

def pbootcms(url, vul_payload, title_1, ip_info, lujing, vulname):
    wenjian=open("G:\\python\\info\\title_error.txt",'a+')
    # 去掉标题中的乱码字符串
    sub_str = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", title_1)
    if str("403") in str(title_1) or str("没有") in title_1 or "Not" in title_1 or str("Nginx") in title_1 or str(
            "503") in title_1  or str("500") in title_1 or str("抱歉") in str(title_1) or len(title_1) == 0:
        title="wrong"
        title1 = parse.urlparse(url).netloc
        aa = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", title1)
        title=aa+"_wrong"
    else:
        title = sub_str
    print(title)
    dayTime = datetime.datetime.now().strftime('%Y-%m-%d')
    document = Document()
    run = document.add_heading('', level=0).add_run(url + u"数据库文件下载 ")
    run.font.name = u'宋体'
    run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    run = document.add_heading('', level=1).add_run(u"一、漏洞描述 ")
    run.font.name = u'宋体'
    run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    aa="PbootCMS v2.0.7 默认数据库下载,默认的数据库路径是/data/pbootcms.db,且data目录下没有进行任何的判断,后台也没有提供修改数据库路径的功能,所以可直接下载。"
    bb= str(aa).encode("utf-8")
    paragraph = document.add_paragraph(aa )
    # 设置首行缩进
    paragraph.paragraph_format.first_line_indent = 406400
    weak_des = u" " + title + u";" + ip_info
    run = paragraph.add_run(weak_des)
    # 设置行间距
    paragraph.paragraph_format.line_spacing = Pt(25)
    run.font.size = Pt(12)
    run.font.name = u'宋体'
    r = run._element
    r.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    # run.font.size = 150000
    run.font.bold = False
    run = document.add_heading('', level=1).add_run(u"二、漏洞详情 ")
    run.font.name = u'宋体'
    run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    paragraph = document.add_paragraph()
    vul_description="PbootCMS存在默认数据库文件下载漏洞"
    run = paragraph.add_run(url + vul_description)
    document.add_picture(lujing, width=Inches(6))
    paragraph.paragraph_format.first_line_indent = 406400
    paragraph.paragraph_format.line_spacing = Pt(25)
    run.font.size = Pt(12)
    run.font.name = u'宋体'
    r = run._element
    r.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    run = document.add_heading('', level=1).add_run(u"三、加固建议 ")
    run.font.name = u'宋体'
    run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    paragraph = document.add_paragraph()
    vul_advice="建议进行系统升级"
    run = paragraph.add_run(vul_advice)
    paragraph.paragraph_format.first_line_indent = 406400
    paragraph.paragraph_format.line_spacing = Pt(25)
    run.font.size = Pt(12)
    run.font.name = u'宋体'
    r = run._element
    r.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
    hl = hashlib.md5()
    hl.update(str(url).encode(encoding='utf-8'))
    # name=name.replace(':','_')
    pwd = "G:\cnvd\exp\cnvd_report\\" + dayTime + vulname
    isExists = os.path.exists(pwd)
    if not isExists:
        os.makedirs(pwd)
    name = pwd + '\\' + title + '.docx'
    print(name)
    document.save(name)
    print(title + "cnvd报告生成成功!!!")
    # print(title,ip_info)
    # dic=['错误信息','PbootCMS','title']
    # print(len(ip_info))
    

pbootcms(url="http://www.xxx.com",vul_payload='hello',title_1="科技有限公司服务热线",ip_info="192.178.1.1_北京市_北京市",vulname="注入",lujing="G:\\cnvd\\exp\\pic\\1bf134b0b8029460d939ab3f73c275dd.png")

你可能感兴趣的:(技术积累,xml)