给定的字符串中有‘*’,表示在一行内,可以和以'w'开头,以'd'结尾的任意字符串相匹配。在一行中,对于第一个字符'w',同时有字符串"wwwd"以及"wwwdd"与之相匹配,根据上述第8条规则,应该匹配"wwwd"。一次类推得到'wwd'和'wd'。同样的规则用于第二行,得到"world"和"word"
#include
#include
#include
char tolower(char s)
{
if (s >= 'A'&&s <= 'Z')
s += 'a' - 'A';
return s;
}
// This function judges whether from a given position(pos_scans), in the string(scans[]),
// the following letters can match the pattern given in the regular expression(regex[]).
// If so, the string matching the pattern is to be stored in the string(prints[]), and return 1
int regex_match(char scans[], int pos_scans, char regex[], char prints[])
{
int iter_regex = 0; // iter_regex records the position of scanner in regex[]
int iter_scans = 0; // iter_scans records the position of scanner in scans[]
int len_regex = strlen(regex);
char dic[81]; // dic[] stores the pattern in a wildcard box "[]"
int i, j;
while (iter_regex < len_regex)
{
if (regex[iter_regex] != '[' && regex[iter_regex] != '*')
{// the scanner in regex[] gets a letter (']' is not included. this is guaranteed in '[' case)
if (tolower(regex[iter_regex]) == tolower(scans[pos_scans + iter_scans]))
{// simply check whether the same letter appears in scans[]
iter_regex++;
iter_scans++;
}
else break;
}
else if (regex[iter_regex] == '[')
{// the scanner starts a wildcard box "[]"
i = 0;
iter_regex++;
while (regex[iter_regex] != ']')
{// store the pattern in this box into dic[]
dic[i++] = regex[iter_regex];
iter_regex++;
}
dic[i] = '\0';
if (dic[0] == '^')
{// if '^' is there in the box, the criteria is opposite
for (j = 1; j < i; j++)
{// the letter scanned in scans[] cannot appear in the box
if (tolower(scans[pos_scans + iter_scans]) == tolower(dic[j]))
break;
}
if (j == i)
{// "j" reaches "i", meaning a success
iter_scans++;
iter_regex++;
}
else break;
}
else
{// no '^' is there in the box
int flag = 0;
for (j = 0; j < i; j++)
{
if (tolower(scans[pos_scans + iter_scans]) == tolower(dic[j]))
{// it is a match only if the letter scanned in scans[] appears in the box
flag = 1;
break;
}
}
if (flag)
{
iter_regex++;
iter_scans++;
}
else break;
}
}
else if (regex[iter_regex] == '*')
{// '*' means any letter (or letters) can match
if (iter_regex == len_regex - 1)
{// if the scanner has already reached the end of regex[]
iter_regex++;
while (scans[pos_scans + iter_scans] != '\0') iter_scans++; // all the remaining letters in scans[] can match
break;
}
else if (regex[iter_regex + 1] != '[')
{// if the scanner gets a letter following '*'
while (tolower(scans[pos_scans + iter_scans]) != tolower(regex[iter_regex + 1]))
{// scanner in scans[] can go forward until it gets the same letter as scanned in regex[]
iter_scans++;
if (scans[pos_scans + iter_scans] == '\0') break;
}
if (tolower(scans[pos_scans + iter_scans]) == tolower(regex[iter_regex + 1]))
{// if the scanner in scans[] meets the same letter as scanned in regex[], the match is a success
iter_scans++;
iter_regex+=2;
}
else break;// otherwise the scanner goes to the end of scans[], meaning the match is a failure
}
else if (regex[iter_regex + 1] == '[')
{// it the scanner finds a '[' following '*'
i = 0;
iter_regex++;
while (regex[iter_regex] != ']')
{// store the pattern into dic[]
dic[i++] = regex[iter_regex];
iter_regex++;
}
dic[i] = '\0';
while (scans[pos_scans + iter_scans] != '\0')
{// check the scanner has not reached the end of scans[]
if (dic[0] == '^')
{// if '^' starts this "[]" box
for (j = 1; j < i; j++)
{// if the letter scanned in scans[] does not appear in the box
// it means a success of matching "*[]"
if (tolower(scans[pos_scans + iter_scans]) == tolower(dic[j]))
{// if the letter appears, we should scan the next letter in scans[]
iter_scans++;
break;
}
}
if (j == i)
{// the letter scanned in scans[] does not appear in the box
iter_scans++;
iter_regex++;
break;
}
}
else
{
int flag = 0;
for (j = 0; j < i; j++)
{
if (tolower(scans[pos_scans + iter_scans]) == tolower(dic[j]))
{// if the letter appears in the box, meaning the match is a success
flag = 1;
break;
}
}
if (flag)
{
iter_regex++;
iter_scans++;
break;
}
else iter_scans++;// if not, we scan the next letter in scans[]
}
}
}
}
if (scans[pos_scans + iter_scans] == '\0') break; // the scanning of scans[] ends
}
if (iter_regex == len_regex)
{// if the scanning of regex is finished, it means the match of regex[] is a success
for (j = 0; j < iter_scans; j++)
prints[j] = scans[pos_scans + j];
prints[j] = '\0';
return 1;
}
else return 0;
}
int main()
{
FILE *fin, *fout;
char regex[21];
char scans[81];
char prints[161];
int line = 0;
int i;
if ((fin = fopen("string.in", "r")) == NULL)
exit(1);
if ((fout = fopen("string.out", "w")) == NULL)
exit(1);
scanf("%s",regex);
while (fgets(scans, 81, fin) != NULL)
{
line++;
int flag = 1;
for (i = 0; scans[i] != '\0'; i++)
{
if (regex_match(scans, i, regex, prints))
{
if(flag) fprintf(fout, "%d:", line);
else fprintf(fout, ",");
fprintf(fout, "%s", prints);
flag = 0;
}
}
if (!flag) fprintf(fout,"\n");
}
fclose(fin);
fclose(fout);
return 0;
}