import re
def cut_address(address):
res = []
def _cut(s, address):
if not address:
return [], ""
res = []
province = re.search(f'([^{s}]+{s})?', address)
province = province.group(1)
if province:
address = address.replace(province, '')
else:
return [], address
res_, address = _cut(s, address)
res += [province] + res_
return res, address
value, address = _cut('省', address)
if value:
res += value
# 某些特殊的二级单位名称
white_list = ["省直辖行政区划"]
for item in white_list:
info = re.search(f"({item})", address)
if info:
res.append(value:=info.group(1))
address = address.replace(value, '')
for s in '市州县区':
value, address = _cut(s, address)
if value:
res += value
return res
湖北省恩施土家族苗族自治州咸丰县 [‘湖北省’, ‘恩施土家族苗族自治州’, ‘咸丰县’]
湖北省咸宁市咸安区 [‘湖北省’, ‘咸宁市’, ‘咸安区’]
湖北省随州市曾都区 [‘湖北省’, ‘随州市’, ‘曾都区’]
湖北省咸宁市赤壁市 [‘湖北省’, ‘咸宁市’, ‘赤壁市’]
湖北省襄阳市保康县 [‘湖北省’, ‘襄阳市’, ‘保康县’]
湖北省宜昌市点军区 [‘湖北省’, ‘宜昌市’, ‘点军区’]