1. nbtutor module

nbtutor用来演示python指令的执行过程，但一些local, nonlocal,
global的variable的演示有小错误，大体上可以参考。python安装说明
（1）PIP安装

pip install nbtutor
jupyter nbextension install --overwrite --py nbtutor
jupyter nbextension enable --py nbtutor

（2）conda安装

conda install -c conda-forge nbtutor

（3）Jupyter Notebook使用

load_ext nbtutor

%%nbtutor
代码

注意：load_ext nbtutor要先执行，和import不一样；另外，安装好后要重启jupyter notebook

2. local, nonlocal, global variable

%%nbtutor
a = 0
m = 0
n = 0
x = 0
#以上都是global variable，且assign的过程和之前文章内容一样，python并不重新创建多个内存表示相同的内容（这里是0），而是把v_name指向v_storage。
#所以上述4条命令其实只有一个v_storage 0，而有4个v_name

print('PRINT 0:  a =', a,
      ' m =', m, ' n =', n,
      '        x =', x)
#输出全局变量的值
#>>> PRINT 0:  a = 0  m = 0  n = 0         x = 0

def f_1():
#python中所有内容都认为是一个object，f_1是一个object
    m = 1
    global n
    n = 1
    x = 1
    y = 1
    z = 1
    #以上variable，除了n以外，都是f_1的local variable，随着f_1的存在而存在，一旦f_1不存在，这些assign也都没有意义。
    #variable n声明了global，所以不属于f_1
    print('PRINT 1:  a =', a,
          ' m =', m, ' n =', n,
          '        x =', x, ' y =', y, ' z =', z)
    #输出4个f_1局部变量和2个全局变量a, n的值，编译逻辑是先找v_name在local的名称，如果没有则找global的v_name
    #>>> PRINT 1:  a = 0  m = 1  n = 1         x = 1  y = 1  z = 1

    def f_2():
        global m
        m = 2
        # Cannot write:
        # nonlocal n
        #因为nonlocal是去该object上一层object中查找，如果上一层没有找到则再去上一层查找，但是不能出现在global中，f_1中没有n，只有global中有n，所以不能用nonlocal
        global n
        n = 2
        global p
        #如果定义一个global不存在的variable，则新创建一个，但最好不要这么写，复杂程序很难看清楚
        p = 2
        x = 2
        nonlocal y
        y = 2
        # Cannot write:
        # nonlocal u
        #因为上一层不存在variable u
        print('PRINT 2:  a =', a,
              ' m =', m, ' n =', n, ' p =', p,
              ' x =', x, ' y =', y, ' z =', z)
        #输出全局变量a,m,n,p，f_1变量y,z，f_2变量x
        #>>> PRINT 2:  a = 0  m = 2  n = 2  p = 2  x = 2  y = 2  z = 1

        def f_3():
            nonlocal x
            x = 3
            #使f_2的x指向3，而不是f_1的x，虽然变量名相同，但是不一样
            nonlocal y
            y = 3
            #使f_2的y指向3，因为f_2的y是nonlocal，同时指向f_1的y，所以f_1的y和f_2的y都指向3
            nonlocal z
            z = 3
            #首先去f_2找z，但是没有找到，再向上去f_1找z，这时把f_1的z指向3
            print('PRINT 3:  a =', a,
                  ' m =', m, ' n =', n, ' p =', p,
                  ' x =', x, ' y =', y, ' z =', z)
            #输出global的a, m, n, p，f_2的x,y,z
            #>>> PRINT 3:  a = 0  m = 2  n = 2  p = 2  x = 3  y = 3  z = 3

        f_3()
        print('PRINT 4:  a =', a,
              ' m =', m, ' n =', n, ' p =', p,
              ' x =', x, ' y =', y, ' z =', z)
        #这层属于f_2，所以输出global的a, m, n, p，f_2的x, y, z
        #>>> PRINT 4:  a = 0  m = 2  n = 2  p = 2  x = 3  y = 3  z = 3

    f_2()
    print('PRINT 5:  a =', a,
          ' m =', m, ' n =', n, ' p =', p,
          ' x =', x, ' y =', y, ' z =', z)
    #这层属于f_1，所以输出global的a, n, p，f_1的m, x, y, z
    #>>> PRINT 5:  a = 0  m = 1  n = 2  p = 2  x = 1  y = 3  z = 3

f_1()
print('PRINT 6:  a =', a,
      ' m =', m, ' n =', n, ' p =', p,
      ' x =', x)
#global层，输出global的a,m,n,p,x，这层没有定义y,z
#PRINT 6:  a = 0  m = 2  n = 2  p = 2  x = 0

local是在当前object level；
nonlocal是向上找object level但是不能无限向上到global level；
global level是最外层

"The nonlocal statement causes the listed identifiers to refer to previously bound variables in the nearest enclosing scope. This is important because the default behavior for binding is to search the local namespace first. The statement allows encapsulated code to rebind variables outside of the local scope besides the global (module) scope.

Names listed in a nonlocal statement, unlike to those listed in a global statement, must refer to pre-existing bindings in an enclosing scope (the scope in which a new binding should be created cannot be determined unambiguously).

Names listed in a nonlocal statement must not collide with pre- existing bindings in the local scope"(https://stackoverflow.com/questions/33211272/what-is-the-difference-between-non-local-variable-and-global-variable)

3. file operation

3.1 create directory

import os
import sys

original_directory = 'names'
#已经创建好的当前路径下的一个路径名
new_directory = original_directory + '_classified'
#准备一个新的在当前路径下的路径名，等于原路径名加上"_classified"

if os.path.isdir(new_directory):
#os.path.isdir(directoryname)用来判断directoryname是否在当前路径
    print(f'{new_directory} already exists, leaving it there.')
    sys.exit()
os.mkdir(new_directory)
#在当前路径下创建新的路径
male_directory = new_directory + '/males'
female_directory = new_directory + '/females'
os.mkdir(male_directory)
os.mkdir(female_directory)

import os，使用其中的常见函数：
os.path.isfile(path)
Return True if path is an existing regular file. 注意这个函数是用来判断是不是file的；
os.path.isdir(path)
Return True if path is an existing directory.注意这个函数是用来判断是不是directory的；

3.2 file operation

for filename in os.listdir(original_directory):
#在准备好的路径下逐一读取file
    if not filename.endswith('.txt'):
        continue
    #筛选所有.txt类型file
    with open(original_directory + '/' + filename) as names,\
                   open(male_directory + '/' + filename, 'w') as male_names,\
                            open(female_directory + '/' + filename, 'w') as female_names:
    #同时打开3个文档，1个是准备好的路径下的文件，另2个是在male和female路径下创建的相同文件名的文件，3个文件都赋予了v-name
        for line in names:
            name, gender, count = line.split(',')
            #读取准备好的文件中的数据，并把每一行的数据按照commma分隔形成一个list，再分别assign给3个variable
            if gender == 'F':
                print(name, ',', count, file = female_names, sep = '', end = '')
            else:
                print(name, ',', count, file = male_names, sep = '', end = '')
            #按照性别分别输出到新创建的文件中，删除性别的信息

3.3 information retrieval

（1）review dict和defaultdict的区别

D = {}
D['paul'] = [1887]
D['paul'].append(1896)
D
>>>
{'paul': [1887, 1896]}

from collections import defaultdict
D = defaultdict(list)
D['paul'].append(1887)
D['paul'].append(1896)
D
>>>
defaultdict(list, {'paul': [1887, 1896]})

看起来上述dict和defaultdict的操作实现了同样的功能，但是defaultdict能够实现不存在的key的添加，如下

D = {}
D['paul'].append(1887)
D['paul'].append(1896)
D

如果使用dict，'paul'不属于D的key，则无法append，但是defaultdict可以。可以简单将defaultdict理解成创建了空key，value为实现定义类型的dict。例如，D = defaultdict(list)，创建了一个key为空，value为空list的一个dict。

（2）找寻一个名字出现在记录中的所有年份

import os
import sys
from collections import defaultdict

original_directory = 'names'

years_by_name = defaultdict(list)
#assign years_by_name类型是defaultdict，这个dict中的value都是list
for filename in os.listdir(original_directory):
    if not filename.endswith('.txt'):
        continue
    with open(original_directory + '/' + filename) as names:
        year = int(filename[3: 7])
        #命名[3:7]是因为所有文件名都是“yob1880.txt”这样的格式
        for line in names:
            name, gender, count = line.split(',')
            if gender == 'M':
                break
            years_by_name[name].append(year)
print(years_by_name['Helen'])

（3）找寻一个名字在记录中gap最大的记录

import os
import sys
from collections import defaultdict

original_directory = 'names'

years_by_name = defaultdict(list)
for filename in os.listdir(original_directory):
    if not filename.endswith('.txt'):
        continue
    with open(original_directory + '/' + filename) as names:
        year = int(filename[3: 7])
        for line in names:
            name, gender, count = line.split(',')
            if gender == 'M':
                break
            years_by_name[name].append(year)
            #到这里创建了所有名字对应出现年份的dictionary，key是名字，value是一个包含所有年份的list，可以使用下面的print()来输出检查
            #print(years_by_name['Helen'])
            #因为原始数据中是按照年份大小排列的，所以输出的value中的list也是按照年份大小从小到大排列的

data = [(years_by_name[name][i + 1] - years_by_name[name][i], years_by_name[name][i], name)
        for name in years_by_name
        for i in range(len(years_by_name[name]) - 1) 
       ]
#list生成方法中使用了循环的嵌套，先循环years_by_name中的所有item，找到一个item后再循环key[value]中的所有list成员，这里注意因为要算gap，所以循环到length - 1，否则最后1个无法做gap计算。
#两层循环嵌套后，根据找到的当前成员的年龄算前后两个的gap
#最终形成所有任意连续两个record之间的gap数据的一个list，list中每一个成员都是一个tuple，包含gap, year, name三个信息，命名为data

for (gap, year, name) in sorted(data, reverse = True)[: 10]:
    print(f'{name} was not given for {gap} may years, the last time '
          f'between {year} and {year + gap}')
#按照gap从大到小排列data，输出10个最大的gap
>>>
Levy was not given for 119 may years, the last time between 1887 and 2006
Izzie was not given for 115 may years, the last time between 1891 and 2006
Izma was not given for 108 may years, the last time between 1899 and 2007
Leannah was not given for 107 may years, the last time between 1889 and 1996
Auguste was not given for 106 may years, the last time between 1892 and 1998
Saidee was not given for 102 may years, the last time between 1893 and 1995
Caledonia was not given for 101 may years, the last time between 1900 and 2001
Enora was not given for 100 may years, the last time between 1910 and 2010
Sella was not given for 97 may years, the last time between 1916 and 2013
Nolah was not given for 97 may years, the last time between 1907 and 2004

4. Vigenère cipher

维吉尼亚密码（又译维热纳尔密码）是使用一系列凯撒密码组成密码字母表的加密算法，属于多表密码的一种简单形式。
课上代码在lecture4的文件夹下面，有很多值得学习的写法。
理解参考：
https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher
https://zh.wikipedia.org/wiki/%E7%BB%B4%E5%90%89%E5%B0%BC%E4%BA%9A%E5%AF%86%E7%A0%81

COMP9021 Principles of Programming WEEK5