common_words = obama_words.sort('count', ascending=False)['word'][:5]
common_words = set(common_words)
count = 0
def has_top_words(word_count_vector):
# extract the keys of word_count_vector and convert it to a set
unique_words = word_count_vector.keys()
unique_words = set(unique_words)
returnWord = common_words.issubset(unique_words)
global count
if returnWord == True:
count = count + 1
# return True if common_words is a subset of unique_words
# return False otherwise
return returnWord
wiki['has_top_words'] = wiki['word_count'].apply(has_top_words)
print(count)
上述代码中,函数has_top_words需要用到count这个全局变量,定义在函数之外,函数中使用前,需要在count前加global。
代码运行后如下: