笨方法python去除收藏夹重复收藏页

#!/usr/bin/python

# -*- coding: UTF-8 -*-

list1=[] # 最终列表

b=None

with open(r"D:\Desktop\bookmarks.html",'r',encoding='utf-8') as fh:

# with open(r"D:\Desktop\y.txt",'r',encoding='utf-8') as fh:

    for line in fh:

        # print(line.strip())

        # print(type(line))

        # print(line.split())

        f=line.strip()

        a=f.split('>')

        if len(a) <3: # 切片后长度小于3就是普通标签直接添加,否则就是收藏夹要去重复

            list1.append(f)

        elif a[-2] != b:

            list1.append(f)

            b = a[-2]

            # print(b)


with open(r"d:\Desktop\new_html.html",'w',encoding='utf-8') as f:

    for i in list1:

        f.write(i)

        f.write('\n')

你可能感兴趣的:(笨方法python去除收藏夹重复收藏页)