In order to crack “Vigenere Cipher” under the circumstance that the key length can be only 3, 4 or 5, I used frequency analysis to find possible keys and compared the Euclidean distance of all candidate keys calculated with “Relative frequencies of letters in the English language” to find correct key length and then the correct key.
My code follows the steps below:
1. Preparation
public static string bigfile=null; // complete file
public static int[] keylength = {3,4,5}; // keylength
public static string filename="sample.txt"; //file name
public static Dictionary normalfre = new Dictionary();
normalfre.Add('E', 0.12072); // standard language frequence pair
normalfre.Add('T', 0.09056);
normalfre.Add('A', 0.08167);
normalfre.Add('O', 0.07507);
normalfre.Add('I', 0.06966);
normalfre.Add('N', 0.06749);
normalfre.Add('S', 0.06327);
normalfre.Add('H', 0.06094);
First, I established a Dictionary called “normalfre” to describe the “Relative frequencies of letters in the English language” found on the internet [http://en.wikipedia.org/wiki/Letter_frequency]. It is used to compare with the frequency we got from the original text.
And candidate “keylength” is {3,4,5}. “filename” is “sample.txt”.
2. Import file
static void readfile(string filename)
{
bigfile=File.ReadAllText(filename);
}
Use function "ReadAllText" to get original string.
3. Find the key length
static int determinkeylength() {
Dictionary avera = new Dictionary(); // store average distance and key length
for (int j = 0; j < 3; j++)
{
List text = divideByKeylength(keylength[j], bigfile); // divide file into keylength part and write into string[]
List distances = new List();
//determine key
for (int i = 0; i < keylength[j]; i++)
{
Dictionary frequences = new Dictionary();
frequences = frequencyAnalysis(text[i]);
double maxfre = frequences.Values.Max();
char maxChar = frequences.Keys.Where(c => frequences[c] == maxfre).LastOrDefault();
// find frequence table
Dictionary kd = findKeyDistance(frequences);
double mindist = kd.Values.Min();
char key = kd.Keys.Where(c => kd[c] == mindist).LastOrDefault();
distances.Add(mindist);
//find possible key and it's distance with normal language frequences
}
avera.Add(j + 3, distances.Sum() / keylength[j]);
// System.Console.WriteLine("Average dis:{0}",avera[j]); // calculate average for determine key length
}
double minn = avera.Values.Min();
int finalkeylength = avera.Keys.Where(c => avera[c] == minn).LastOrDefault();
//System.Console.WriteLine(finalkeylength);
return finalkeylength;
}
}
4. Divide text by key length
static List divideByKeylength(int keylen, string originalText) {
List dividedfile = new List();
StringBuilder[] sb = new StringBuilder[keylen];
for (int i = 0; i < keylen; i++)
sb[i] = new StringBuilder();
for (int i = 0; i < originalText.Length; i++)
sb[i % keylen].Append(originalText[i]);
foreach (var item in sb)
dividedfile.Add(item.ToString());
return dividedfile;
}
5. Frequency analysis for each divided text
static Dictionary frequencyAnalysis(string dividedfile) {
Dictionary < char, double > fretable = new Dictionary < char, double > ();
double filelength = dividedfile.Length;
for (int i = 0; i < filelength; i++)
{
char key = dividedfile[i];
if (fretable.Keys.Contains(key))
fretable[key] = fretable[key] + 1/filelength;
else
fretable[key] = 1/filelength;
}
return fretable;
}
6. Calculate Euclidean distance between our result with normal English language letter frequency
static Dictionary frequencyAnalysis(string dividedfile) {
Dictionary < char, double > fretable = new Dictionary < char, double > ();
double filelength = dividedfile.Length;
for (int i = 0; i < filelength; i++)
{
char key = dividedfile[i];
if (fretable.Keys.Contains(key))
fretable[key] = fretable[key] + 1/filelength;
else
fretable[key] = 1/filelength;
}
return fretable;
}
7. Key is the candidate key with minimum Euclidean distance