import jieba
stop = [line.strip().encode('utf-8').decode('utf-8-sig') for line in open('cn_stopwords.txt').readlines() ]
s="朝鲜半岛西北部古元古代高温变质-深熔作用:宏观和微观岩石学以及锆石U-Pb年代学制约"
segs = jieba.cut(s, cut_all=False)
final = []
for seg in segs:
if seg not in stop:
final.append(seg)
print(list(final))