Q1
Using a stack to evaluate fully parenthesised expressions.
Possible interaction is shown below:
$ python3
...
>>> from fully_parenthesised import *
>>> evaluate(’100’)
100
>>> evaluate(’[(1 - 20) + 300]’)
281
>>> evaluate(’[1 - {20 + 300}]’)
-319
>>> evaluate(’( { 20*4 }/5 )’)
16.0
>>> evaluate(’(20*[4/5])’)
16.0
>>> evaluate(’({1 + (20 * 30)} - [400 / 500])’)
600.2
>>> evaluate(’{1 + [((20*30)-400) / 500]}’)
1.4
>>> evaluate(’[1 + {(2 * (3+{4*5})) / ([6*7]-[8/9]) }]’)
2.1189189189189186
>>> evaluate(’100 + 3’)
>>> evaluate(’(100 + 3’)
>>> evaluate(’(100 + -3)’)
>>> evaluate(’(100 50)’)
>>> evaluate(’(100 / 0)’)
思路是先检查输入的expression,将合法字符以string的格式按顺序保存在一个list中。对list中的每个元素顺序处理,如果是数字或者运算符则直接压入栈中,如果是任何的右括号,则弹出栈中前三项进行计算。计算前先检查这三项是否是数字+运算符+数字的合法输入,如果不是则return空,如果是则计算相应数值再压入栈中。直到栈为空,结束处理,返回当前栈中最后的一个数字。
import re
from operator import add, sub, mul, truediv
from stack_adt import Stack, EmptyStackError
def evaluate(expression):
'''
Checks whether an expression is a full parenthesised expression,
and in case the answer is yes, returns the value of the expression,
provided that no division by 0 is attempted; otherwise, return None.
>>> evaluate('100')
100
>>> evaluate('[(1 - 20) + 300]')
281
>>> evaluate('[1 - {20 + 300}]')
-319
>>> evaluate('( { 20*4 }/5 )')
16.0
>>> evaluate('(20*[4/5])')
16.0
>>> evaluate('({1 + (20 * 30)} - [400 / 500])')
600.2
>>> evaluate('{1 + [((20*30)-400) / 500]}')
1.4
>>> evaluate('[1 + {(2 * (3+{4*5})) / ([6*7]-[8/9]) }]')
2.1189189189189186
>>> evaluate('100 + 3')
>>> evaluate('(100 + 3')
>>> evaluate('(100 + -3)')
>>> evaluate('(100 50)')
>>> evaluate('(100 / 0)')
'''
if any(not (c.isdigit() or c.isspace() or c in '+-*/()[]{}') for c in expression):
return
#如果表达式中含有数字、空格或者+-*/()[]{}之外的内容,则无法计算,返回void
tokens = re.compile('(\d+|\+|-|\*|/|\(|\)|\[|\]|\{|\})').findall(expression)
#将表达式中所有的数字+-*/()[]{}识别出来,每一个形成一个string,放入tokens的list中
try:
value = evaluate_expression(tokens)
return value
except ZeroDivisionError:
return
def evaluate_expression(tokens):
stack = Stack()
for token in tokens:
try:
if token in '+-*/':
stack.push(token)
else:
token = int(token)
stack.push(token)
#把所有的数字和运算符号压栈
except ValueError:
if token in ')]}':
try:
arg_2 = stack.pop()
operator = stack.pop()
arg_1 = stack.pop()
if operator not in '+-*/' or str(arg_1) in '+-*/' or str(arg_2) in '+-*/':
return
stack.push({'+': add, '-': sub, '*': mul, '/': truediv}[operator](arg_1, arg_2))
except EmptyStackError:
return
#如果token是任何一个括号的结尾,则代表要进行一次四则运算,弹出栈中前三项,分别是第二个运算数字、运算符和第一个运算数字
if stack.is_empty():
return
value = stack.pop()
if not stack.is_empty():
return
return value
if __name__ == '__main__':
import doctest
doctest.testmod()
Q2
Write a program word_ladder.py that computes all transformations of a word word_1 into a word word_2, consisting of sequences of words of minimal length, starting with word_1, ending in word_2, and such that two consecutive words in the sequence differ by at most one letter. All words have to occur in a dictionary with name dictionary.txt, stored in the working directory.
It is convenient and effective to first create a dictionary whose keys are all words in the dictionary with one letter replaced by a “slot”, the value for a given key being the list of words that match the key with the “slot” being replaced by an appropriate letter. From this dictionary, one can then build a dictionary with words as keys, and as value for a given key the list of words that differ in only one letter from the key.
The program implements a function word_ladder(word_1, word_2) that returns the list of all solutions, a solution being as previously described.
Next is a possible interaction.
$ python3
...
>>> from word_ladder import *
>>> for ladder in word_ladder(’cold’, ’warm’): print(ladder)
...
[’COLD’, ’CORD’, ’WORD’, ’WORM’, ’WARM’]
[’COLD’, ’CORD’, ’WORD’, ’WARD’, ’WARM’]
[’COLD’, ’CORD’, ’CARD’, ’WARD’, ’WARM’]
>>> for ladder in word_ladder(’three’, ’seven’): print(ladder)
...
[’THREE’, ’THREW’, ’SHREW’, ’SHRED’, ’SIRED’, ’SITED’, ’SATED’, ’SAVED’, ’SAVER’, ’SEVER’, ’SEVEN’]
这个题目Eric的思路非常巧妙,我在自己解答的时候不能很好的解决双循环遍历字典用时过长的问题。所以此处贴出对Eric的代码分析:
# Written by Eric Martin for COMP9021
from collections import defaultdict, deque
import sys
'''
Computes all transformations from a word word_1 to a word word_2, consisting of
sequences of words of minimal length, starting with word_1, ending in word_2,
and such that two consecutive words in the sequence differ by at most one letter.
All words have to occur in a dictionary with name dictionary.txt, stored in the
working directory.
'''
dictionary_file = 'dictionary.txt'
def get_words_and_word_relationships():
try:
with open(dictionary_file) as dictionary:
lexicon = set()
contextual_slots = defaultdict(list)
for word in dictionary:
word = word.rstrip()
#去除单词后面的空格
lexicon.add(word)
#将所有单词存入lexicon的set中
for i in range(len(word)):
contextual_slots[word[: i], word[i + 1: ]].append(word)
#每一个word,以删除一个字母的剩余的前后部分构成的一个tuple做key,word做value,构建一个contextual_slots的dict
#例如:('A', 'RON'): ['AARON', 'AKRON', 'APRON']
#dict是hash的,效率高,string拆分效率也很高
closest_words = defaultdict(set)
for slot in contextual_slots:
#遍历所有的slot,他们的特点是元组里都缺了一个字母,但是这些slot的字典value都是彼此相差一个字母的
for i in range(len(contextual_slots[slot])):
#遍历所有slot字典value的list中的word,i作为第一个备选单词
for j in range(i + 1, len(contextual_slots[slot])):
#遍历所有slot字典value中i之后list中的word,j作为第二个备选单词,i与j成为只差一个字母的一对单词
closest_words[contextual_slots[slot][i]].add(contextual_slots[slot][j])
closest_words[contextual_slots[slot][j]].add(contextual_slots[slot][i])
#在closest_words字典中i的key后面加入j的value,j的key后面加入i的value
#例如:'APE': {'ABE', 'ACE', 'AGE', 'ALE', 'APT', 'ARE', 'ATE', 'AWE', 'AYE'}
return lexicon, closest_words
except FileNotFoundError:
print(f'Could not open {dictionary_file}. Giving up...')
sys.exit()
def word_ladder(word_1, word_2):
lexicon, closest_words = get_words_and_word_relationships()
word_1 = word_1.upper()
word_2 = word_2.upper()
#将输入的单词全部大写,因为字典中都是大写的WORD,避免key value error
if len(word_1) != len(word_2) or word_1 not in lexicon or word_2 not in lexicon:
return []
if word_1 == word_2:
return [[word_1]]
solutions = []
queue = deque([[word_1]])
#从word_1开始建立一个queue,用deque的原因是契合这里的前后队列操作,效率高,且队列里存放的是单词变换的一个list,不是某一个单词
while queue:
word_sequence = queue.pop()
#取出一个单词变换过程的list
last_word = word_sequence[-1]
#取出变换list的最后一个单词
for word in closest_words[last_word]:
#基于最后一个单词,找出所有和它差一个字母的单词
if word == word_2:
#如果word和目标word_2一致,意味着已经找到了一个解
if not solutions or len(solutions[-1]) > len(word_sequence):
solutions.append(word_sequence + [word])
#如果solutions为空,或者solutions最后一个解的长度大于当前找到的解的长度减1的数值,则把新找到的解加入solution中
#这里巧妙的地方在于len(solutions[-1]) > len(word_sequence)允许变换长度一致的解被加入solution中
if not solutions and word not in word_sequence:
queue.appendleft(word_sequence + [word])
#如果还没有形成解,则在队列左侧加入新一次变换后的list。
#放在左侧是BFS的思路,把所有未形成解的变换全部处理完,再处理新的变换。
#也就是说长度从小到大,遍历每个长度上所有可能,由此找到最短的变换,而不是找到所有从word1到word2的方法
return solutions