python strip_tags 支持保留指定标签

#coding:utf-8



import re



def strip_tags(string, allowed_tags=''):

  if allowed_tags != '':

    # Get a list of all allowed tag names.

    allowed_tags = allowed_tags.split(',')

    allowed_tags_pattern = ['</?'+allowed_tag+'[^>]*>' for allowed_tag in allowed_tags]

    all_tags = re.findall(r'<[^>]+>', string, re.I)

    not_allowed_tags = []

    tmp = 0

    for tag in all_tags:

        for pattern in allowed_tags_pattern:

            rs = re.match(pattern,tag)

            if rs:

                tmp += 1

            else:

                tmp += 0

        if not tmp:

            not_allowed_tags.append(tag)

        tmp = 0

    for not_allowed_tag in not_allowed_tags:

        string = re.sub(re.escape(not_allowed_tag), '',string)

    print not_allowed_tags

  else:

    # If no allowed tags, remove all.

    string = re.sub(r'<[^>]*?>', '', string)

 

  return string

  

你可能感兴趣的:(python)