Python - 字符处理

文章目录

      • 提取不同类型字符


提取不同类型字符

转载自:https://www.cnblogs.com/zihe/p/8323508.html

import string
from collections import namedtuple

def str_count(s):
    '''找出字符串中的中英文、空格、数字、标点符号个数'''
    count_en = count_dg = count_sp = count_zh = count_pu = 0

    s_len = len(s)
    for c in s:
        # 英文
        if c in string.ascii_letters:
            count_en += 1
        # 数字
        elif c.isdigit():
            count_dg += 1
        # 空格
        elif c.isspace():
            count_sp += 1
        # 中文
        elif c.isalpha():
            count_zh += 1
        # 特殊字符
        else:
            count_pu += 1

    total_chars = count_zh + count_en + count_sp + count_dg + count_pu
    if total_chars == s_len:
        return namedtuple('Count', ['total', 'zh', 'en', 'space', 'digit', 'punc'])(s_len, count_zh, count_en,count_sp, count_dg, count_pu)
    else:
        print('Something is wrong!')
        return None

def test1():

    str_l = "这是一个test字符串32,7% 44,5% 58,2% 62%" # 该字符串共有 32 个字符,其中有 7 个汉字,4 个英文,3 个空格,11 个数字,7 个标点符号。
    # str_l = 'En cas d’incertitude, consultez votre médecin ou votre pharmacien.' # 该字符串共有 66 个字符,其中有 1 个汉字,54 个英文,8 个空格,0 个数字,3 个标点符号。
    # str_l = 'Güterverkehr 1987 — Binnenwasserstraßen 1989. 208 S. (ES/DA/DE/GR/EN/FR/IT/NUPT) CA-55-89-770-9A-C ECU' # 该字符串共有 102 个字符,其中有 2 个汉字,55 个英文,9 个空格,19 个数字,17 个标点符号。
    count = str_count(str_l)
    print(str_l, end='\n\n')
    print('该字符串共有 {} 个字符,其中有 {} 个汉字,{} 个英文,{} 个空格,{} 个数字,{} 个标点符号。'.format(count.total, count.zh, count.en, count.space, count.digit, count.punc))


你可能感兴趣的:(Python)