第六章---字符串

无耻的copy source code。。。懒啊。。。

问题6.1 括号匹配问题

在某个字符串中包含有左括号、右括号与其他符号:规定(与常见的算术式子一样)任何一个左括号从内到外地与它右边、距离最近的右括号相匹配。请写一个程序,找出无法匹配的左括号与右括号,并且在输入列下方把它们标出来。

#include <iostream>
#include <iterator>
#include <algorithm>
#include <string>

using namespace std;

#define MAXLENGTH 100
#define YES 1
#define NO 0

int location[MAXLENGTH];

void par_count(char *line, char *error, int *sw)
{
	int left = 0;
	int right = 0;
	int loc_ptr = -1;
	int i;

	*sw = NO;
	for (i = 0; line[i] != '\0'; i++)
	{
		error[i] = ' ';
		if (line[i] == '(')
		{
			location[++loc_ptr] = i;
			left++;
		}
		else if (line[i] == ')')
		{
			if (left <= right)
			{
				error[i] = '?';
				*sw = YES;
			}
			else
			{
				right++;
				loc_ptr--;
			}
		}
	}
	error[i] = '\0';
	if (loc_ptr >= 0)
	{
		*sw = YES;
		for (i = 0; i <= loc_ptr; i++)
			error[location[i]] = '$';
	}
}

void main()
{
	char line[] = "((ABCD(X)";
	const int size = sizeof line / sizeof *line;
	char error[size];
	
	int sw;
	
	par_count(line, error, &sw);

	if (sw == YES)
	{
		for (int i = 0; i < size; i++)
		{
			cout << line[i] << " ";
		}
		cout << endl;
		for (int i = 0; i < size; i++)
		{
			cout << error[i] << " ";
		}
		cout << endl;
	}
	else
	{
		cout << "yes" << endl;
	}
}

问题6.2 转换成后继式写法

请写一个程序,读入一道正确的算术式,把它转译成反向波兰形式。为了方便起见,假设整道算术式在同一列,并且变量只有一个英文字母,不含常数(换言之,所有运算符都是一个字母的变量)。目前,只需要处理+, -, *, /, (、)即可,没有正负号。

#include <iostream>
#include <iterator>
#include <algorithm>
#include <string>
#include <stddef.h>
#include <ctype.h>

using namespace std;

#define BOTTOM '\0'
#define EOL '\1'
#define LEFT_PAR '\2'
#define RIGHT_PAR '\3'
#define PLUS_MINUS '\4'
#define MUL_DIV '\5'

#define MAX_DEPTH 100
#define BOTTOM '\0'
static char stack[MAX_DEPTH];
static char code[MAX_DEPTH];
static int top;

void initial();
char stack_top();
void push(char, char);
char pop();

void initial()
{
	top = 0;
	code[top] = BOTTOM;
}

char stack_top()
{
	return code[top];
}

void push(char oper, char opr_code)
{
	if (++top == MAX_DEPTH)
	{
		printf("\n*** ERROR *** Stack Overflow.");
		exit(1);
	}
	else
	{
		stack[top] = oper;
		code[top] = opr_code;
	}
}

char pop()
{
	if (top == 0)
	{
		printf("\n*** ERROR *** Stack Overflow.");
		exit(1);	
	}
	else
		return stack[top--];
}

void main()
{
	char line[100];
	const char *input = "a*(b+c)/d+k";
	char opr, t;

	const char *pInput = input;
	
	initial();
	while (true)
	{
		if (isalpha(*pInput))
			printf("%c", *pInput);
		else if (*pInput == '(')
			push(*pInput, LEFT_PAR);
		else if (!isspace(*pInput))
		{
			switch (*pInput)
			{
			case '+':
			case '-':
				opr = PLUS_MINUS;
				break;
			case '*':
			case '/':
				opr = MUL_DIV;
				break;
			case ')':
				opr = RIGHT_PAR;
				break;
			case '\0':
				opr = EOL;
				break;
			default:
				printf("*** Unrecognizable char ***");
				exit(EXIT_FAILURE);
			}
			while ((t = stack_top()) >= opr)
				printf("%c", pop());
			if (t == LEFT_PAR && opr == RIGHT_PAR)
				pop();
			else if (opr == EOL)
				exit(EXIT_FAILURE);
			else
				push(*pInput, opr);
		}
		pInput++;
	}
}



问题6.3 计算前置式写法

课本中都会提到如何算一道反向波兰形式的表达式的计算方式,但如何计算前置式波兰形式的表达式呢?为了简单起见,表达式在同一列,只有加、减、乘、除4个运算符,操作数只有一个数字符号,请写一个程序,接收一道前置式波兰形式的表达式,把结果求出来

#include <iostream>
#include <iterator>
#include <algorithm>
#include <string>
#include <stddef.h>
#include <ctype.h>

using namespace std;

#define LINE_SIZE 100
#define STACK_BOTTOM 0
#define OPERAND 1
#define OPERATOR 2
#define STACK_SIZE 100

struct item
{
	union
	{
		double value;
		char oper;
	}store;
	int type;
};

static struct item stack[STACK_SIZE];
static int top;

int is_opr(char);
double compute(char, double, double);
void initial();
void push_opn(double);
void push_opr(char);
double pop_opn();
char pop_opr();
int stack_top();

void main()
{
	double opn1, opn2;
	const char *input = "-/*2+54+12/8+13";
	char opr;
	const char *p;

	printf("Prefix Form Evaluator\n");
	initial();
	for (p = input; *p != '\0'; p++)
	{
		if (is_opr(*p))
			push_opr(*p);
		else if (isdigit(*p))
		{
			opn2 = *p - '0';
			while (stack_top() == OPERAND)
			{
				opn1 = pop_opn();
				opr = pop_opr();
				opn2 = compute(opr, opn1, opn2);
			}
			push_opn(opn2);
		}
	}
	printf("\n Result = %lf", pop_opn());
}

int is_opr(char opr)
{
	return opr == '+' || opr == '-' || opr == '*' || opr == '/';
}

double compute(char opr, double opn1, double opn2)
{
	double result;

	switch (opr)
	{
	case '+':
		result = opn1 + opn2;
		break;
	case '-':
		result = opn1 - opn2;
		break;
	case '*':
		result = opn1 * opn2;
		 break;
	case '/':
		result = opn1 / opn2;
		break;
	}
	return result;
}

void initial()
{
	top = 0;
	stack[top].type = STACK_BOTTOM;
}

void push_opn(double data)
{
	stack[++top].type = OPERAND;
	stack[top].store.value = data;
}

void push_opr(char opr)
{
	stack[++top].type = OPERATOR;
	stack[top].store.oper = opr;
}

double pop_opn()
{
	return stack[top--].store.value;
}

char pop_opr()
{
	return stack[top--].store.oper;
}

int stack_top()
{
	return stack[top].type;
}

问题6.4 Knuth-Morris-Pratt法寻找字符串(KMP算法)

关于KMP,最重要的是对模式串求next,具体code如下:

#include <iostream>
#include <algorithm>
#include <iterator>

using namespace std;

int next[100];

void get_next(const char *input, int (&next)[100])
{
	int i = 1;
	next[1] = 0;
	int j = 0;
	while (i < strlen(input))
	{
		if (j == 0 || input[i] == input[j])
		{
			++i;
			++j;
			next[i] = j;
		}
		else
			j = next[j];
	}
}

void main()
{
	const char *input = "abcaabbcabcaabdab";
	get_next(input, next);

	for (int i = 0; i < strlen(input); i++)
	{
		cout << input[i] << " ";
	}
	cout << endl;
	for (int i = 1; i <= strlen(input); i++)
		cout << next[i] << " ";
	cout << endl;
}


虽然while循环的index是i,但是循环的次数却大于strlen(input)次,对于串"aaaaaaaaaab",求next函数的复杂度居然是O(n^2),所以需要加以改进:

void get_next(const char *input, int (&next)[100])
{
	int i = 1;
	next[1] = 0;
	int j = 0;
	while (i < strlen(input))
	{
		if (j == 0 || input[i] == input[j])
		{
			++i;
			++j;
			if (input[i] == input[j])
				next[i] = next[j];
			else
				next[i] = j;
		}
		else
			j = next[j];
	}
}

KMP算法本来不复杂的,个人感觉网上很多人都没弄清楚就到处写,无论code还是其他的漏洞百出,混淆视听啊。。。

#include <iostream>
#include <algorithm>
#include <iterator>

using namespace std;

int next[100];

void get_next(const char *input, int (&next)[100])
{
	int i = 1;
	next[1] = 0;
	int j = 0;
	while (i < strlen(input))
	{
		if (j == 0 || input[i] == input[j])
		{
			++i;
			++j;
			if (input[i] == input[j])
				next[i] = next[j];
			else
				next[i] = j;
		}
		else
			j = next[j];
	}
}

int KMP(char* S,char* T)
{
	int k = 0, j = 0;
	while (k < strlen(S) && j < strlen(T))
	{
		if (S[k] == T[j])
		{
			++k;
			++j;
		}
		else
		{
			if (j >= 1)
				j = next[j];
			else
				k++;
		}
	}
	if (j >= strlen(T)) 
		return k - j;
	else
		return -1;
}


void main()
{
	char lhs[] = "abcabcdfabcabgabdeg";
	char rhs[] = "abcab";

	get_next(rhs, next);
	for (int i = 1; i <= strlen(rhs); i++)
		cout << next[i] << " ";
	cout << endl;
	int result = KMP(lhs, rhs);
	if (result == -1)
		cout << "error" << endl;
	else
	{
		cout << "result = " << result << endl;
	}
}

KMP不一定是一种节省复杂度的算法,比如模式串里的所有字符均不相同,立马bug了~

#include <iostream>
#include <iterator>
#include <algorithm>
#include <string>
#include <stddef.h>
#include <ctype.h>

using namespace std;

void setup(char *pat, int *fail)
{
	if (pat == NULL || fail == NULL)
		return;

	int length = strlen(pat);
	int i, j;

	fail[0] = -1;
	for (i = 1; i < length; i++)
	{
		for (j = fail[i - 1]; j >= 0 && pat[j + 1] != pat[i]; j = fail[j])
			;
		fail[i] = (j < 0 && pat[j + 1] != pat[i]) ? -1 : j + 1;
	}
}

int KMP(char *text, char *pat, int *fail)
{
	int t_length = strlen(text);
	int p_length = strlen(pat);
	int t, p;

	setup(pat, fail);
	for (t = p = 0; t < t_length && p < p_length;)
	{
		if (text[t] != pat[p])
		{
			if (p > 0)
				p = fail[p - 1] + 1;
			else
				t++;
		}
		else
		{
			t++;
			p++;
		}
	}

	return (p >= p_length) ? t - p_length : -1;
}

void main()
{
	char lhs[] = "abcdefghi";
	char rhs[] = "cde";
	int fail[10];

	int result = KMP(lhs, rhs, fail);
	if (result == -1)
		cout << "error" << endl;
	else
	{
		cout << "result = " << result << endl;
	}
}

问题6.5 Boyer-Moore法寻找字符串

根据实验与理论的分析,KMP方法在一般情况下,并不见得会比传统的写法快多少,不过还有更好写而且平均会比KMP快的方法存在。这个方法由Boyer与Moore两人差不多于KMP方法同时发现,这个题目主要就是探讨Boyer-Moore方法。

这个比KMP容易理解多了啊~~

#include <iostream>
#include <algorithm>
#include <iterator>

using namespace std;

#define NOT_FOUND -1

void get_jump(char*, int *);
int BM(char *, char *);

int BM(char *text, char *pat)
{
	int jump_table[256];
	int t_len = strlen(text);
	int p_len = strlen(pat);
	int i, j, k;

	get_jump(pat, jump_table);
	for (i = p_len - 1; i < t_len;)
	{
		for (j = p_len - 1, k = i; j >= 0 && text[k] == pat[j]; k--, j--)
			;
		if (j < 0)
			return k + 1;
		else
			i += jump_table[text[i]];
	}

	return NOT_FOUND;
}

void get_jump(char* pat, int *jump_table)
{
	int length = strlen(pat);
	int i;

	for (i = 1; i < 256; i++)
		jump_table[i] = length;
	for (i = 0; i < length - 1; i++)
		jump_table[pat[i]] = length - i - 1;
}

void main()
{
	char lhs[] = "abcdefghi";
	char rhs[] = "cde";

	int result = BM(lhs, rhs);
	if (result == -1)
		cout << "error" << endl;
	else
	{
		cout << "result = " << result << endl;
	}
}

问题6.6 所谓的h序列

题目是要写一个序列,接收一个字符串,辨认它是不是一个h序列,所谓的h序列是这样定义的:第一,0这个数字符号是一个h序列;第二,任何的h序列如果不是一个0的话,就是从1开始,后面跟着两个h序列。

#include <iostream>
#include <algorithm>
#include <iterator>

using namespace std;

#define YES 1
#define NO 1

int h_seq(char *);
int cursor;

int h_sequence(char *x)
{
	int length = strlen(x);
	cursor = 0;
	if (h_seq(x) == YES)
	{
		if (cursor == length - 1)
			return YES;
	}
	return NO;
}

int h_seq(char *x)
{
	switch (x[cursor])
	{
	case '0':
		return YES;
	case '1':
		cursor++;
		if (h_seq(x) == YES)
		{
			cursor++;
			if (h_seq(x) == YES)
				return YES;
		}
		return NO;
	default:
		return NO;
	}
}

/*
int h_sequence(char *x)
{
	int length = strlen(x);
	int count;
	int i;

	for (count = 1, i = 0; count != 0 && i < length; i++)
	{
		switch (x[i])
		{
		case '0':
			count--;
			break;
		case '1':
			count++;
			break;
		default:
			return NO;
		}
	}
	return count == 0 && i >= length;
}*/

void main()
{
	char *x = "1000";
	int result = h_sequence(x);
	cout << "result = " << result << endl;
}

问题6.7 寻找部分序列

如果s是一个字符串,把其中(在任何位置)的符号去掉,留下来的内容是s的一个子序列。比如说,如果s的内容是"abcdefg",去掉b、d、f,留下"aceg";去掉e、f、g,留下"abcde";去掉a、b、c、e、g得到"df"。于是"aceg"、"abcde"与“df"都是原来字符串"abcdefg"的子序列,或者说是部分序列。请写一个函数,接受字符串text[]和pat[],看看pat[]是否为text[]的子序列,并且把pat[]在text[]中各符号的位置记录下来。

思路:题目简单,DP无关

#include <iostream>
#include <algorithm>
#include <iterator>

using namespace std;

#define FOUND 1
#define NOT_FOUND 0

int subsequence(char *text, char *pat, int *loc)
{
	int t_len = strlen(text);
	int p_len = strlen(pat);
	int i, j;

	if (p_len > t_len)
		return NOT_FOUND;

	for (i = j = 0; i < t_len && j < p_len; j++)
	{
		for (; i < t_len && text[i] != pat[j]; i++)
			;
		if (i >= t_len)
			return NOT_FOUND;
		else
			loc[j] = i;
	}

	return FOUND;
}

void main()
{
	char *lhs = "abcdefg";
	char *rhs = "aceg";
	
	const int len = strlen(rhs);
	int *loc = new int[len];
	subsequence(lhs, rhs, loc);
	for (int i = 0; i < len; i++)
		cout << loc[i] << " ";
	cout << endl;
}

问题6.8 最长重复部分序列

如果t与p是两个字符串,把p中的每一个符号重复写i次,就得到一个新字符串pi,pi是t的子序列吗?请写一个程序,找出最大的、是pi还是t的子序列的i,如果p根本就不是t的子序列,则程序返回0

思路:利用二分法,总感觉书上的code有问题,在这里改了又改。。。

#include <iostream>
#include <algorithm>
#include <iterator>

using namespace std;

#define FOUND 1
#define NOT_FOUND 0

int subsequence(char *text, char *pat, int number);

int max_repetition(char *text, char *pat)
{
	int t_len = strlen(text);
	int p_len = strlen(pat);
	int low = 1;
	int high = t_len / p_len;
	int mid;

	if (subsequence(text, pat, low) == NOT_FOUND)
		return NOT_FOUND;

	while (low + 1 < high)
	{
		mid = (low + high) / 2;
		if (subsequence(text, pat, mid) == FOUND)
			low = mid;
		else
			high = mid;
	}

	if (subsequence(text, pat, high) == FOUND)
		return high;
	else
		return low;
}

int subsequence(char *text, char *pat, int number)
{
	int t_len = strlen(text);
	int p_len = strlen(pat);
	int i, j;
	int index = 0;

	if (p_len > t_len)
		return NOT_FOUND;

	for (i = j = 0; i < t_len && j < p_len; j++)
	{
		int count;
		do 
		{
			count = 0;
			for (; i < t_len && text[i] == pat[j]; i++)
				count++;
			if (count < number)
				return NOT_FOUND;
			else
				break;
		} while (true);
	}

	if (i <= t_len && j == p_len)
		return FOUND;
	else
		return NOT_FOUND;
}

void main()
{
	char *lhs = "aaabbbcccd";
	char *rhs = "abc";
	
	int result = max_repetition(lhs, rhs);
	cout << "result = " << result << endl;
}

问题6.9 最长共同部分序列

如果A=a1a2...am是一个长度为m的字符串,把其中的若干(可能是0个,也可能是n)个符号去掉,而得到一个新字符串,这个新字符串就叫做A的部分序列。例如,若A=abc0123,那么b02,abc123,b3,c,abc0123,ab12,。。。都是A的部分序列。

假设给了两个字符串A和B,长度分别是m和n,那么A与B就含有若干共同的部分序列,至少虚字符串(或者说是空字符串)就是一个共同部分序列。所谓C是A与B的共同部分序列,指的是C是A的部分序列,C也是B的部分序列。倾斜一个程序,把A与B的共同部分序列中最长的一个找出来。

这个问题一般都叫做最长共同部分序列问题,简称LCS。

思路:典型的DP问题,代码以前写过了,copy~

#include <iostream>
#include <algorithm>
#include <iterator>

using namespace std;

void longest_common_subsequence(char *a, char *b, char *result)
{
	int **d;
	int m = strlen(a);
	int n = strlen(b);
	cout << "m = " << m << endl;
	cout << "n = " << n << endl;
	int i, j, count;

	d = (int **) malloc (sizeof(int) * (m + 1));
	d[0] = (int *) malloc (sizeof(int) * (m + 1) * (m + 1));

	for (i = 1; i <= m; i++)
	{
		d[i] = d[i - 1] + n + 1;
	}

	d[0][0] = 0;
	for (i = 1; i <= m; d[i][0] = 0, i++)
		;
	for (j = 1; j <= n; d[0][j] = 0, j++)
		;

	for (i = 1; i <= m; i++)
	{
		for (j = 1; j <= n; j++)
		{
			if (a[i - 1] == b[j - 1])
				d[i][j] = d[i - 1][j - 1] + 1;
			else if (d[i][j - 1] > d[i - 1][j])
				d[i][j] = d[i][j - 1];
			else
				d[i][j] = d[i - 1][j];
		}
	}

	for (int i = 1; i <= m; i++)  
	{  
		for (int j = 1; j <= n; j++)  
		{  
			cout << d[i][j] << " ";  
		}  
		cout << endl;  
	}  

	count = d[m][n];
		cout << "count = " << count << endl;
	result[count] = 0;
	for (i = m, j = n; (i != 0) && (j != 0);)
	{
		if (d[i][j] == d[i - 1][j])
			i--;
		else if (d[i][j] == d[i][j - 1])
			j--;
		else
		{
			result[--count] = a[i - 1];
			i--;
			j--;
		}
	}
	free(d[0]);
	free(d);
}

void main()
{
	char array1[] = {'A', 'B', 'C', 'B', 'D', 'A', 'B', '\0'};  
	const int size1 = sizeof array1 / sizeof *array1;  
	char array2[] = {'B', 'D', 'C', 'A', 'B', 'A', '\0'};  
	const int size2 = sizeof array2 / sizeof *array2;
	char result[100];

	longest_common_subsequence(array1, array2, result);

	cout << "result = " << result << endl;
}

问题6.10 字符串编修

已知两个字符串s与t,要研究如何把字符串s经由一连串修改后变成t。能够使用的就是插入一个符号,以及删除一个符号。把某个符号换成另一个,就可以通过先把它删除再原地插入所需的符号来完成。请写一个程序,接收s与t,找出如何才能够在最少步骤之下把s改成t。

思路:

1. 插入字符,1次操作

2. 删除字符,1次操作

3. 修改字符,2次操作

code如下:

#include <iostream>
#include <algorithm>
#include <iterator>

using namespace std;

#define INSERT_COST 1
#define DELETE_COST 1
#define EXCHANGE_COST 2

#define SWAP(a, b) { t = a; a = b; b = t; }

void reverse(int *x, int n)
{
	int i, j, t;
	for (i = 0, j = n - 1; i <= j; i++, j--)
		SWAP(x[i], x[j]);
}

#define MIN(x, y, z) ((x) <= (y) ? \
	((x) <= (z) ? (x) : (z)) : \
	((y) <= (z) ? (y) : (z)))

void edit(char *source , char *target, int *s, int *t, int *count)
{
	int s_len = strlen(source);
	int t_len = strlen(target);
	int insert_t, delete_s, exchange;
	int i, j, no;
	int **cost;

	cost = (int **) malloc (sizeof(int) * (s_len + 1));
	cost[0] = (int *) malloc (sizeof(int) * (s_len + 1) * (t_len + 1));
	cost[0][0] = 0;
	for (i = 1; i <= s_len; i++)
		cost[i] = cost[i - 1] + t_len + 1;
	for (i = 1; i <= s_len; i++)
		cost[i][0] = cost[i - 1][0] + 1;
	for (j = 1; j <= t_len; j++)
		cost[0][j] = cost[0][j - 1] + 1;

	for (i = 0; i < s_len; i++)
	{
		for (j = 0 ; j < t_len; j++)
		{
			if (source[i] == target[j])
				cost[i + 1][j + 1] = cost[i][j];
			else
			{
				insert_t = cost[i + 1][j] + INSERT_COST;
				delete_s = cost[i][j + 1] + DELETE_COST;
				exchange = cost[i][j] + EXCHANGE_COST;
				cost[i + 1][j + 1] = MIN(insert_t, delete_s, exchange);
			}
		}
	}

	for (i = s_len, j = t_len, no = 0; i != 0 && j != 0;)
	{
		if (cost[i][j] == cost[i - 1][j] + INSERT_COST)
			i--;
		else if (cost[i][j] == cost[i][j - 1] + DELETE_COST)
			j--;
		else
		{
			s[no] = i - 1;
			t[no] = j - 1;
			no++, i--, j--;
		}
	}

	reverse(s, no);
	reverse(t, no);
	*count = cost[s_len][t_len];
	free(cost[0]);
	free(cost);
}

void main()
{
	char *source = "abcdef";
	char *target = "xbyzek";
	int *s = new int[10];
	int *t = new int[10];

	int count;

	edit(source, target, s, t, &count);

	cout << "count = " << count << endl;
}

问题6.11 产生无连续重复部分的字符串

请写一个程序,产生由1、2、3这3个数字符号所构成、长度为n的字符串,并且在字符串中对于任何一个部分字符串而言,都不会有相邻的、完全相同的部分字符串。

你可能感兴趣的:(c,c,input,div)