In order to crack “Vigenere Cipher” under the circumstance that the key length can be only 3, 4 or 5, I used frequency analysis to find possible keys and compared the Euclidean distance of all candidate keys calculated with “Relative frequencies of letters in the English language” to find correct key length and then the correct key.
My code follows the steps below:
1. Preparation
public static string bigfile=null; // complete file public static int[] keylength = {3,4,5}; // keylength public static string filename="sample.txt"; //file name public static Dictionary<char, double> normalfre = new Dictionary<char, double>(); normalfre.Add('E', 0.12072); // standard language frequence pair normalfre.Add('T', 0.09056); normalfre.Add('A', 0.08167); normalfre.Add('O', 0.07507); normalfre.Add('I', 0.06966); normalfre.Add('N', 0.06749); normalfre.Add('S', 0.06327); normalfre.Add('H', 0.06094);
First, I established a Dictionary called “normalfre” to describe the “Relative frequencies of letters in the English language” found on the internet [http://en.wikipedia.org/wiki/Letter_frequency]. It is used to compare with the frequency we got from the original text.
And candidate “keylength” is {3,4,5}. “filename” is “sample.txt”.
2. Import file
static void readfile(string filename) { bigfile=File.ReadAllText(filename); }
Use function "ReadAllText" to get original string.
3. Find the key length
static int determinkeylength() { Dictionary<int, double> avera = new Dictionary<int, double>(); // store average distance and key length for (int j = 0; j < 3; j++) { List<string> text = divideByKeylength(keylength[j], bigfile); // divide file into keylength part and write into string[] List<double> distances = new List<double>(); //determine key for (int i = 0; i < keylength[j]; i++) { Dictionary<char, double> frequences = new Dictionary<char, double>(); frequences = frequencyAnalysis(text[i]); double maxfre = frequences.Values.Max(); char maxChar = frequences.Keys.Where(c => frequences[c] == maxfre).LastOrDefault(); // find frequence table Dictionary<char, double> kd = findKeyDistance(frequences); double mindist = kd.Values.Min(); char key = kd.Keys.Where(c => kd[c] == mindist).LastOrDefault(); distances.Add(mindist); //find possible key and it's distance with normal language frequences } avera.Add(j + 3, distances.Sum() / keylength[j]); // System.Console.WriteLine("Average dis:{0}",avera[j]); // calculate average for determine key length } double minn = avera.Values.Min(); int finalkeylength = avera.Keys.Where(c => avera[c] == minn).LastOrDefault(); //System.Console.WriteLine(finalkeylength); return finalkeylength; } }
4. Divide text by key length
static List<string> divideByKeylength(int keylen, string originalText) { List<string> dividedfile = new List<string>(); StringBuilder[] sb = new StringBuilder[keylen]; for (int i = 0; i < keylen; i++) sb[i] = new StringBuilder(); for (int i = 0; i < originalText.Length; i++) sb[i % keylen].Append(originalText[i]); foreach (var item in sb) dividedfile.Add(item.ToString()); return dividedfile; }
5. Frequency analysis for each divided text
static Dictionary<char, double> frequencyAnalysis(string dividedfile) { Dictionary < char, double > fretable = new Dictionary < char, double > (); double filelength = dividedfile.Length; for (int i = 0; i < filelength; i++) { char key = dividedfile[i]; if (fretable.Keys.Contains(key)) fretable[key] = fretable[key] + 1/filelength; else fretable[key] = 1/filelength; } return fretable; }
6. Calculate Euclidean distance between our result with normal English language letter frequency
static Dictionary<char, double> frequencyAnalysis(string dividedfile) { Dictionary < char, double > fretable = new Dictionary < char, double > (); double filelength = dividedfile.Length; for (int i = 0; i < filelength; i++) { char key = dividedfile[i]; if (fretable.Keys.Contains(key)) fretable[key] = fretable[key] + 1/filelength; else fretable[key] = 1/filelength; } return fretable; }
7. Key is the candidate key with minimum Euclidean distance