[TOC]
15.1为文档中包含的单词生成一个列表
#include
#include
#include
using namespace std;
int main()
{ set S;
string t;
set::iterator j;
while (cin >> t)
S.insert(t);
for (j = S.begin(); j != S.end(); ++j)
cout << *j << "\n";
return 0;
}
对文档中每个单词出现的次数做统计
/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */
/* wordfreq.cpp -- List all words in input file, with counts */
#include
#include
使用自定义的散列表,对文档中每个单词出现的次数做统计
/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */
/* wordfreq.c -- list of words in file, with counts */
#include
#include
#include
typedef struct node *nodeptr;
typedef struct node {
char *word;
int count;
nodeptr next;
} node;
#define NHASH 29989
#define MULT 31
nodeptr bin[NHASH];
unsigned int hash(char *p)
{ unsigned int h = 0;
for ( ; *p; p++)
h = MULT * h + *p;
return h % NHASH;
}
#define NODEGROUP 1000
int nodesleft = 0;
nodeptr freenode;
nodeptr nmalloc()
{ if (nodesleft == 0) {
freenode = malloc(NODEGROUP*sizeof(node));
nodesleft = NODEGROUP;
}
nodesleft--;
return freenode++;
}
#define CHARGROUP 10000
int charsleft = 0;
char *freechar;
char *smalloc(int n)
{ if (charsleft < n) {
freechar = malloc(n+CHARGROUP);
charsleft = n+CHARGROUP;
}
charsleft -= n;
freechar += n;
return freechar - n;
}
void incword(char *s)
{ nodeptr p;
int h = hash(s);
for (p = bin[h]; p != NULL; p = p->next)
if (strcmp(s, p->word) == 0) {
(p->count)++;
return;
}
p = nmalloc();
p->count = 1;
p->word = smalloc(strlen(s)+1);
strcpy(p->word, s);
p->next = bin[h];
bin[h] = p;
}
int main()
{ int i;
nodeptr p;
char buf[100];
for (i = 0; i < NHASH; i++)
bin[i] = NULL;
while (scanf("%s", buf) != EOF)
incword(buf);
for (i = 0; i < NHASH; i++)
for (p = bin[i]; p != NULL; p = p->next)
printf("%s %d\n", p->word, p->count);
return 0;
}
15.2 短语
/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */
/* longdup.c -- Print longest string duplicated M times */
#include
#include
#include
//比较函数
int pstrcmp(char **p, char **q)
{ return strcmp(*p, *q); }
//返回两个参数字符串中共同部分的长度
int comlen(char *p, char *q)
{ int i = 0;
while (*p && (*p++ == *q++))
i++;
return i;
}
#define M 1
#define MAXN 5000000
char c[MAXN], *a[MAXN];
int main()
{ int i, ch, n = 0, maxi, maxlen = -1;
while ((ch = getchar()) != EOF) {
a[n] = &c[n];
c[n++] = ch;
}
c[n] = 0;
for(i = 0;i < n;i ++)
printf("a[%d] = %s",i,a[i]);
qsort(a, n, sizeof(char *), pstrcmp);
for(i = 0;i < n;i ++)
printf("a[%d] = %s\n",i,a[i]);
for (i = 0; i < n-M; i++)
if (comlen(a[i], a[i+M]) > maxlen) {
maxlen = comlen(a[i], a[i+M]);
maxi = i;
}
//printf("maxi = %d, maxlen = %d, %s\n", maxi, maxlen, a[maxi]);
printf("%.*s\n", maxlen, a[maxi]);
//printf("%s\n",a[maxi]);
return 0;
}
15.3 生成文本
/* Copyright (C) 2000 Lucent Technologies */
/* Modified from markov.c in 'Programming Pearls' by Jon Bentley */
/* markovlet.c -- generate letter-level random text from input text
Alg: Store text in an array on input
Scan complete text for each output character
(Randomly select one matching k-gram)
*/
#include
#include
char x[5000000];
int main()
{ int c, i, eqsofar, max, n = 0, k = 5;
char *p, *nextp, *q;
while ((c = getchar()) != EOF)
x[n++] = c;
x[n] = 0;
p = x;
srand(1);
for (max = 2000; max > 0; max--) {
eqsofar = 0;
for (q = x; q < x + n - k + 1; q++) {
for (i = 0; i < k && *(p+i) == *(q+i); i++)
;
if (i == k)
if (rand() % ++eqsofar == 0)
nextp = q;
}
c = *(nextp+k);
if (c == 0)
break;
putchar(c);
p = nextp+1;
}
return 0;
}
/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */
/* markov.c -- generate random text from input document */
#include
#include
#include
char inputchars[4300000];
char *word[800000];
int nword = 0;
int k = 2;
int wordncmp(char *p, char* q)
{ int n = k;
for ( ; *p == *q; p++, q++)
if (*p == 0 && --n == 0)
return 0;
return *p - *q;
}
int sortcmp(char **p, char **q)
{ return wordncmp(*p, *q);
}
char *skip(char *p, int n)
{ for ( ; n > 0; p++)
if (*p == 0)
n--;
return p;
}
int main()
{ int i, wordsleft = 10000, l, m, u;
char *phrase, *p;
word[0] = inputchars;
while (scanf("%s", word[nword]) != EOF) {
word[nword+1] = word[nword] + strlen(word[nword]) + 1;
nword++;
}
for (i = 0; i < k; i++)
word[nword][i] = 0;
for (i = 0; i < k; i++)
printf("%s\n", word[i]);
qsort(word, nword, sizeof(word[0]), sortcmp);
phrase = inputchars;
for ( ; wordsleft > 0; wordsleft--) {
l = -1;
u = nword;
while (l+1 != u) {
m = (l + u) / 2;
if (wordncmp(word[m], phrase) < 0)
l = m;
else
u = m;
}
for (i = 0; wordncmp(phrase, word[u+i]) == 0; i++)
if (rand() % (i+1) == 0)
p = word[u+i];
phrase = skip(p, 1);
if (strlen(skip(phrase, k-1)) == 0)
break;
printf("%s\n", skip(phrase, k-1));
}
return 0;
}
/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */
/* markovhash.c -- generate random text, sped up with hash tables */
/* For storage efficiency (and also to minimize changes from markov.c),
the hash table is implemented in the integer array next.
If bin[i]=j, then word[j] is the first element in the list,
word[next[j]] is the next element, and so on.
*/
#include
#include
#include
char inputchars[4300000];
#define MAXWORDS 800000
char *word[MAXWORDS];
int nword = 0;
int k = 2;
int next[MAXWORDS];
#define NHASH 499979
int bin[NHASH];
#define MULT 31
unsigned int hash(char *ptr)
{ unsigned int h = 0;
unsigned char *p = ptr;
int n;
for (n = k; n > 0; p++) {
h = MULT * h + *p;
if (*p == 0)
n--;
}
return h % NHASH;
}
int wordncmp(char *p, char* q)
{ int n = k;
for ( ; *p == *q; p++, q++)
if (*p == 0 && --n == 0)
return 0;
return *p - *q;
}
int sortcmp(char **p, char **q)
{ return wordncmp(*p, *q);
}
char *skip(char *p, int n)
{ for ( ; n > 0; p++)
if (*p == 0)
n--;
return p;
}
int main()
{ int i, wordsleft = 10000, j;
char *phrase, *p;
word[0] = inputchars;
while (scanf("%s", word[nword]) != EOF) {
word[nword+1] = word[nword] + strlen(word[nword]) + 1;
nword++;
}
for (i = 0; i < k; i++)
word[nword][i] = 0;
for (i = 0; i < NHASH; i++)
bin[i] = -1;
for (i = 0; i <= nword - k; i++) { /* check */
j = hash(word[i]);
next[i] = bin[j];
bin[j] = i;
}
for (i = 0; i < k; i++)
printf("%s\n", word[i]);
phrase = inputchars;
for ( ; wordsleft > 0; wordsleft--) {
i = 0;
for (j = bin[hash(phrase)]; j >= 0; j = next[j])
if ((wordncmp(phrase, word[j]) == 0)
&& (rand() % (++i) == 0))
p = word[j];
phrase = skip(p, 1);
if (strlen(skip(phrase, k-1)) == 0)
break;
printf("%s\n", skip(phrase, k-1));
}
return 0;
}