python是个什么东西---python---python的字符处理

  
  
# -*- coding:UTF-8 -*-   
  
"""字符处理的工具: 
判断unicode是否是中文,日文,数字,英文,或者其他字符。 
全角符号转半角符号。"""  
  
def is_number(self, uchar):
    """判断一个unicode是否是数字"""
    if uchar >= u'\u0030' and uchar <= u'\u0039':
        return True
    else:
        return False
def is_other(self, uchar):
    """判断是否非汉字,数字和英文字符"""
    if not (self.is_unicode(uchar) or self.is_number(uchar) or self.is_alphabet(uchar)):
        return True
    else:
        return False

def is_alphabet(self, uchar):
    """判断一个unicode是否是英文字母"""
    if (uchar >= u'\u0041' and uchar <= u'\u005a') or (uchar >= u'\u0061' and uchar <= u'\u007a'):
        return True
    else:
        return False
def is_unicode(self, uchar):
    """判断一个unicode是否是汉字"""
#        \u4e00-\u9fa5 (中文)
#        \u0800-\u4e00 (日文)
    if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
        return True
    elif uchar >= u'\u0800' and uchar <= u'\u4e00':
        return True
    else:
        return False
  
def Q2B(uchar):  
    """全角转半角"""  
    inside_code = ord(uchar)  
    if inside_code == 0x3000:  
        inside_code = 0x0020  
    else:  
        inside_code -= 0xfee0  
    if inside_code  0x7e:      #转完之后不是半角字符返回原来的字符  
        return uchar  
    return unichr(inside_code)  
  
def stringQ2B(ustring):  
    """把字符串全角转半角"""  
    return "".join([Q2B(uchar) for uchar in ustring])  
  
def uniform(ustring):  
    """格式化字符串,完成全角转半角,大写转小写的工作"""  
    return stringQ2B(ustring).lower()  
  
def string2List(ustring):  
    """将ustring按照中文,字母,数字分开"""  
    retList = []  
    utmp = []  
    for uchar in ustring:  
        if is_other(uchar):  
            if len(utmp) == 0:  
                continue  
            else:  
                retList.append("".join(utmp))  
                utmp = []  
        else:  
            utmp.append(uchar)  
    if len(utmp) != 0:  
        retList.append("".join(utmp))  
    return retList  
  
if __name__ == "__main__":  
#        #test Q2B and B2Q  
#        for i in range(0x0020,0x007F):  
#                print Q2B(B2Q(unichr(i))),B2Q(unichr(i))  
  
#test uniform  
    ustring = u'中国 人名a高频A'  
    ustring = uniform(ustring)  
    ret = string2List(ustring)  
    print ret  

你可能感兴趣的:(工作,python)