from xpinyin import Pinyin
pin = Pinyin()
from collections import OrderedDict
def deal_conversion(tags):
"""
将标签中的值得中文名处理为拼音,英文名不变
:param tags:
:return:
"""
_tags = []
for tag in tags:
value = tag.get("value")
pinyin_value = pin.get_pinyin(value)
tag['conversion'] = pinyin_value
_tags.append(tag)
return _tags
def sorted_by_conversion(tags):
"""
基本按照拼音排序
:param tags:
:return:
"""
tags = sorted(tags, key=lambda tag: tag['conversion'])
return tags
def sort_by_capital(new_tags):
"""
归类
tag_list = [
{"class": 'A', 'tags': []},
{"class": 'B', 'tags': []},
]
:param
:return:
"""
# print("new_tags:", new_tags)
A = []
B = []
C = []
D = []
E = []
F = []
G = []
J = []
H = []
I = []
J = []
K = []
L = []
M = []
N = []
O = []
P = []
Q = []
R = []
S = []
T = []
U = []
V = []
W = []
X = []
Y = []
Z = []
other = []
capital_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
# 将标签依次放到相应列表
for tag in new_tags:
conversion = tag.get("conversion")
if conversion:
start = conversion.upper()[0]
if start in capital_list:
local_dict = locals()
capital_single = local_dict.get(start)
capital_single.append(tag)
else:
other.append(tag)
# 构造聚合类的列表
class_list = []
for capital in capital_list:
class_dict = {}
class_dict["class"] = capital
local_dict = locals()
capital_single_list = local_dict.get(capital)
if len(capital_single_list) > 0:
class_dict["tags"] = capital_single_list
class_list.append(class_dict)
# 如果大写字母的列表是空的,就不用返回了
# else:
# class_dict["tags"] = []
# class_list.append(class_dict)
# 根据聚合类的大写字母排序
tag_list = sorted(class_list, key=lambda item: item['class'])
# 去重
b = OrderedDict()
for item in tag_list:
b.setdefault(item['class'], {**item, })
tag_list = list(b.values())
# 将非字母的数字和特殊符号单独放置在class: #中,放到聚类末尾
class_other = {}
class_other["class"] = "#"
if len(other) > 0:
class_other["tags"] = other
tag_list.append(class_other)
# 给所有的聚类列表伪造一个"全部"头
header = {
"class": "",
"tags": [
{
"id": "100000000000000000000003",
"value": "全部"
}
]
}
tag_list.insert(0, header)
return tag_list
if __name__ == '__main__':
tags = [
{
"_id" : "5c810376dd40ba638423c623",
"key" : "brand",
"value" : "3.1 Phillip Lim",
"_type" : "brand"
},
{
"_id": "5c810376dd40ba638423c623",
"key": "star",
"value": "贾静雯",
"_type": "star"
},
{
"_id" : "5c810376dd40ba638423c623",
"key" : "brand",
"value" : "A Détacher",
"_type" : "brand"
},
{
"_id" : "5c810376dd40ba638423c623",
"key" : "brand",
"value" : "Z Peace Treaty",
"_type" : "brand"
},
{
"_id": "5c810376dd40ba638423c623",
"key": "123 ",
"value": "#@$",
"_type": "brand"
},
{
"_id" : "5c810376dd40ba638423c623",
"key" : "brand",
"value" : "Spark",
"_type" : "brand"
},
{
"_id" : "5c810376dd40ba638423c623",
"key" : "star",
"value" : "陈奕迅",
"_type" : "star"
},
{
"_id": "5c810376dd40ba638423c623",
"key": "star",
"value": "林青霞",
"_type": "star"
},
{
"_id" : "5c810376dd40ba638423c623",
"key" : "brand",
"value" : "Hello",
"_type" : "brand"
},
{
"_id": "5c810376dd40ba638423c623",
"key": "123 ",
"value": "Hello",
"_type": "brand"
},
]
# 将中文名转换成拼音,英文的不变
tags = deal_conversion(tags)
# print(tags)
# 根据转换值排序,即特殊字符-数字-英文26字母大写-英文26字母小写排序
tags = sorted_by_conversion(tags)
# print(tags)
# 根据大写字母分别放到相应大写字母的列表,数字和特殊字符安置在其他,然后聚类
tags = sort_by_capital(tags)
print(tags)
# 结果
# [{
# 'class': '',
# 'tags': [{
# 'id': '100000000000000000000003',
# 'value': '全部'
# }]
# }, {
# 'class': 'A',
# 'tags': [{
# '_id': '5c810376dd40ba638423c623',
# 'key': 'brand',
# 'value': 'A Détacher',
# '_type': 'brand',
# 'conversion': 'A Détacher'
# }]
# }, {
# 'class': 'C',
# 'tags': [{
# '_id': '5c810376dd40ba638423c623',
# 'key': 'star',
# 'value': '陈奕迅',
# '_type': 'star',
# 'conversion': 'chen-yi-xun'
# }]
# }, {
# 'class': 'H',
# 'tags': [{
# '_id': '5c810376dd40ba638423c623',
# 'key': 'brand',
# 'value': 'Hello',
# '_type': 'brand',
# 'conversion': 'Hello'
# }, {
# '_id': '5c810376dd40ba638423c623',
# 'key': '123 ',
# 'value': 'Hello',
# '_type': 'brand',
# 'conversion': 'Hello'
# }]
# }, {
# 'class': 'J',
# 'tags': [{
# '_id': '5c810376dd40ba638423c623',
# 'key': 'star',
# 'value': '贾静雯',
# '_type': 'star',
# 'conversion': 'jia-jing-wen'
# }]
# }, {
# 'class': 'L',
# 'tags': [{
# '_id': '5c810376dd40ba638423c623',
# 'key': 'star',
# 'value': '林青霞',
# '_type': 'star',
# 'conversion': 'lin-qing-xia'
# }]
# }, {
# 'class': 'S',
# 'tags': [{
# '_id': '5c810376dd40ba638423c623',
# 'key': 'brand',
# 'value': 'Spark',
# '_type': 'brand',
# 'conversion': 'Spark'
# }]
# }, {
# 'class': 'Z',
# 'tags': [{
# '_id': '5c810376dd40ba638423c623',
# 'key': 'brand',
# 'value': 'Z Peace Treaty',
# '_type': 'brand',
# 'conversion': 'Z Peace Treaty'
# }]
# }, {
# 'class': '#',
# 'tags': [{
# '_id': '5c810376dd40ba638423c623',
# 'key': '123 ',
# 'value': '#@$',
# '_type': 'brand',
# 'conversion': '#@$'
# }, {
# '_id': '5c810376dd40ba638423c623',
# 'key': 'brand',
# 'value': '3.1 Phillip Lim',
# '_type': 'brand',
# 'conversion': '3.1 Phillip Lim'
# }]
# }]