def word_count(file_name):
import collections
word_freq = collections.defaultdict(int)
with open(file_name) as f:
for l in f:
for w in l.strip().split():
word_freq[w] += 1
return word_freq
def build_dict(file_name, min_word_freq=10):
word_freq = word_count(file_name)
word_freq = filter(lambda x: x[1] > min_word_freq, word_freq.items())
word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*word_freq_sorted))
with open(r"data/voacb.txt",'a') as f:
f.write('\n'.join(words))