代码复审

  对于结队编程抱到黎柱金同学的大腿让我轻松许多,结对伙伴的词频统计程序写的很好,现在我对大神代码做些个人意见的评价。

  伙伴的代码整体都很好,代码简洁,没冗余代码,而且用C#语言实现面向对象,层次化抽象清晰,任务非配合理,没有过多的类冗余属性,而且代码重用也应用很好,进一步使得代码简洁明了。

 1 using System;

 2 using System.Collections.Generic;

 3 using System.IO;

 4 using System.Linq;

 5 using System.Text;

 6 using System.Text.RegularExpressions;

 7 using System.Threading.Tasks;

 8 

 9 namespace WordFrequency

10 {

11     class Program

12     {

13         static void Main(string[] args)

14         {

15             TallyMode mode;

16             string path;

17             string outputFileName = "[email protected]";

18 

19             #region tally mode & path

20             switch (args.Count())

21             {

22                 case 0:

23                     Console.WriteLine("Please specify a directory!");

24                     return;

25                 case 1:

26                     mode = TallyMode.Normal;

27                     path = args[0];

28                     break;

29                 case 2:

30                     if (args[0] == "-e2")

31                         mode = TallyMode.E2;

32                     else if (args[0] == "-e3")

33                         mode = TallyMode.E3;

34                     else

35                     {

36                         Console.WriteLine("The argument must be -e2 or -e3. Scanning cancelled.");

37                         return;

38                     }

39                     path = args[1];

40                     break;

41                 default:

42                     Console.WriteLine("Arguments incorrect. Scanning cancelled.");

43                     return;

44             }

45             #endregion

46 

47             WordManager wordManager;

48             try

49             {

50                 wordManager = new WordManager(path, mode);

51             }

52             catch (IOException e)

53             {

54                 Console.WriteLine(e.Message);

55                 return;

56             }

57 

58             Console.WriteLine("Path: {0}", path);

59             Console.WriteLine("Start scanning...");

60             wordManager.ScanAndCount(new List<string>() { ".txt", ".cpp", ".h", ".cs" }, false);

61             Console.WriteLine("Complete!");

62 

63             Console.WriteLine(new string('-', Console.WindowWidth - 1));

64             var writer = new StreamWriter(outputFileName, false, Encoding.Default);

65             wordManager.Output(true, writer);

66             //wordManager.Output(true, Console.Out);

67             writer.Close();

68             Console.WriteLine("The results have been saved to \"{0}\"", outputFileName);

69         }

70     }

71 }

  伙伴将命令行输入可能出错进行判断,而且在控制台有人性化输出,让别人可以知道自己的输入问题所在,而且还进行异常处理,以防输入过程出现问题,使得程序更加稳定。在文件处理时也采用了异常处理,使得程序严谨正确性大大提高。

 

  1 using System;

  2 using System.Collections.Generic;

  3 using System.IO;

  4 using System.Linq;

  5 using System.Text;

  6 using System.Text.RegularExpressions;

  7 using System.Threading.Tasks;

  8 

  9 namespace WordFrequency

 10 {

 11     class WordManager

 12     {

 13         /* the list is sorted by words' lowercases, and it's not of the final order */

 14         /* in consideration of extended mode,it can also used for list of "word group"s */

 15         SortedList<string, WordInfo> WordList = new SortedList<string, WordInfo>();

 16 

 17         DirectoryInfo RootDirectory;

 18         TallyMode Mode;

 19 

 20         public WordManager(string path, TallyMode mode)

 21         {

 22             if (!Directory.Exists(path))

 23                 throw new IOException("The directory specified doesn't exist!");

 24 

 25             RootDirectory = new DirectoryInfo(path);

 26             Mode = mode;

 27         }

 28 

 29         /// <summary>

 30         /// 

 31         /// </summary>

 32         /// <param name="extensions">

 33         /// each extension should contains prefix dot, e.g., ".png"

 34         /// </param>

 35         public void ScanAndCount(IList<string> extensions, bool showLogs)

 36         {

 37             /* scan files and sub-directories recursively */

 38             var resultFileInfos =

 39                 from fileInfo in RootDirectory.EnumerateFiles("*", SearchOption.AllDirectories)

 40                 where ((from ext in extensions where ext.Equals(fileInfo.Extension, StringComparison.OrdinalIgnoreCase) select ext).Count() > 0)

 41                 select fileInfo;

 42 

 43             /* count words in each file */

 44             foreach (var fileInfo in resultFileInfos)

 45             {

 46                 if (showLogs)

 47                     Console.WriteLine("Scanning {0}", fileInfo.FullName);

 48                 CountWords(fileInfo);

 49             }

 50         }

 51 

 52         public void Output(bool sort, TextWriter writer)

 53         {

 54             var wordInfoList = WordList.Values.ToList();

 55 

 56             if (sort)

 57                 wordInfoList.Sort();

 58 

 59             if (Mode == TallyMode.Normal)

 60                 foreach (var wordInfo in wordInfoList)

 61                     writer.WriteLine("{0}: {1}", wordInfo.Word, wordInfo.Frequency);

 62             else

 63                 for (int i = 0; i < WordList.Count() && i < 10; i++)

 64                     writer.WriteLine("{0}: {1}", wordInfoList[i].Word, wordInfoList[i].Frequency);

 65         }

 66 

 67         /* count words in specified file and store them to WordList */

 68         private void CountWords(FileInfo fileInfo)

 69         {

 70             FileStream readStream = fileInfo.OpenRead();

 71             StreamReader reader = new StreamReader(readStream, Encoding.Default);

 72             string text = reader.ReadToEnd();

 73 

 74             int i = 0;

 75             while (i < text.Length)

 76             {

 77                 string word;

 78 

 79                 if ((word = CurrentWord(text, i)) == null)

 80                 {

 81                     i++;

 82                     continue;

 83                 }

 84 

 85                 i += word.Length;

 86 

 87                 string word1 = null;

 88                 if (Mode == TallyMode.E2 || Mode == TallyMode.E3)

 89                 {

 90                     if (i >= text.Length || text[i] != ' ' || (word1 = CurrentWord(text, i + 1)) == null)

 91                         continue;

 92                     word += ' ' + word1;

 93                 }

 94 

 95                 string word2 = null;

 96                 if (Mode == TallyMode.E3)

 97                 {

 98                     if (i + word1.Length + 1 >= text.Length || text[i + word1.Length + 1] != ' ' ||

 99                         (word2 = CurrentWord(text, i + word1.Length + 2)) == null)

100                         continue;

101                     word += ' ' + word2;

102                 }

103 

104                 /* add this word to the list */

105                 string lowerCase = word.ToLower();

106                 if (WordList.ContainsKey(lowerCase))

107                     WordList[lowerCase].Add(word);

108                 else

109                     WordList[lowerCase] = new WordInfo(word);

110 

111             }

112         }

113 

114         /// <summary>

115         /// if there is a legal string starting with s[i], returns it,

116         /// otherwise returns null

117         /// </summary>

118         /// <param name="s"></param>

119         /// <param name="i"></param>

120         /// <returns></returns>

121         private string CurrentWord(string s, int i)

122         {

123             if (i >= s.Length)

124                 return null;

125 

126             /* not alphabetic */

127             if (!IsAlphabetic(s[i]))

128                 return null;

129 

130             /* alphabetic, but not following a delemeter */

131             if (i > 0 && !IsDelimiter(s[i - 1]))

132                 return null;

133 

134             /* legal start, but less than 3 alphabetic successively */

135             if (i + 2 >= s.Length || !IsAlphabetic(s[i + 1]) || !IsAlphabetic(s[i + 2]))

136                 return null;

137 

138             /* build word */

139             StringBuilder wordBuilder = new StringBuilder(s.Substring(i, 3));

140             i += 3;

141             while (i < s.Length && IsAlphanumerical(s[i]))

142             {

143                 wordBuilder.Append(s[i]);

144                 i++;

145             }

146             string word = wordBuilder.ToString();

147 

148             return word;

149         }

150 

151         private bool IsAlphabetic(char c)

152         {

153             return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';

154         }

155 

156         private bool IsAlphanumerical(char c)

157         {

158             return IsAlphabetic(c) || c >= '0' && c <= '9';

159         }

160 

161         private bool IsDelimiter(char c)

162         {

163             return !IsAlphanumerical(c);

164         }

165     }

166 }

  代码中的类、函数与变量的命名体现了显式表达原则,让人一看就可以了解其作用与目的,可读性增添不少。

 

 1 using System;

 2 using System.Collections.Generic;

 3 using System.Linq;

 4 using System.Text;

 5 using System.Threading.Tasks;

 6 

 7 namespace WordFrequency

 8 {

 9     class WordInfo : IComparable<WordInfo>

10     {

11         public string Word { get; private set; }

12         public int Frequency { get; private set; }

13 

14         public WordInfo(string word)

15         {

16             Word = word;

17             Frequency = 1;

18         }

19 

20         public void Add(string newForm)

21         {

22             Frequency++;

23 

24             /* update the word */

25             Word = string.CompareOrdinal(Word, newForm) < 0 ? Word : newForm;

26         }

27 

28         public int CompareTo(WordInfo other)

29         {

30             /* frequency */

31             if (this.Frequency > other.Frequency)

32                 return -1;

33             else if (this.Frequency < other.Frequency)

34                 return 1;

35             /* lexical */

36             else

37                 return string.CompareOrdinal(this.Word, other.Word);

38         }

39     }

40 }

 

  伙伴的代码经过他本人的优化已经体现了正确性与高效性,但是也有些细节可以进一步提高。

  首先,代码中增添了些许注释,但是相对于代码来说还不是足够的,可以在再适当增添些,增加代码可读性。

 1 using System;

 2 using System.Collections.Generic;

 3 using System.Linq;

 4 using System.Text;

 5 using System.Threading.Tasks;

 6 

 7 namespace WordFrequency

 8 {

 9     enum TallyMode

10     {

11         Normal,

12         E2,

13         E3

14     }

15 }

  

  然后,他在模式二与模式三时的单词排序时,可以变换下算法,提高程序效率。他的代码在三种模式都是进行所有单词全部排序,而在模式二和模式三时,只需要输出TOP10频率的单词。因而,可以每次输出都遍历下全部单词,输出最高频率,此时的时间复杂度为O(10*N),比现在O(N*log(N))提高不少。

 

 

  最后,也是算法方面可以提高的问题,在WordManager.cs中,如下代码(在WorldManager.cs中的104行开始):

 

/* add this word to the list */

                string lowerCase = word.ToLower();

                if (WordList.ContainsKey(lowerCase))

                    WordList[lowerCase].Add(word);

                else

                    WordList[lowerCase] = new WordInfo(word);

  

  

  这里if判断语句containskey和wordlist[lowercase]执行了两次对lowercase的查找,如果改成一次查找就能缩短一半的时间。像这样:

 /* add this word to the list */

                string lowerCase = word.ToLower();

                WordInfo value;

                WordList.TryGetValue(lowerCase, out value);

                if (value != null)

                    value.Add(word);

                else

                    WordList[lowerCase] = new WordInfo(word);

 

 

 

  以上就是我对黎柱金同学的个人项目代码复审报告。

 

你可能感兴趣的:(代码)