Pearls15

[TOC]

15.1为文档中包含的单词生成一个列表

#include 
#include 
#include 
using namespace std;

int main()
{   set S;
    string t;
    set::iterator j;
    while (cin >> t)
        S.insert(t);
    for (j = S.begin(); j != S.end(); ++j)
        cout << *j << "\n";
    return 0;
}

对文档中每个单词出现的次数做统计

/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */

/* wordfreq.cpp -- List all words in input file, with counts */

#include 
#include 
#include 
using namespace std;

int main()
{   map M;
    map::iterator j;
    string t;
    while (cin >> t)
        M[t]++;
    for (j = M.begin(); j != M.end(); ++j)
        cout << j->first << " " << j->second << "\n";
    return 0;
}

使用自定义的散列表，对文档中每个单词出现的次数做统计

/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */

/* wordfreq.c -- list of words in file, with counts */

#include 
#include 
#include 

typedef struct node *nodeptr;
typedef struct node {
    char *word;
    int count;
    nodeptr next;
} node;

#define NHASH 29989
#define MULT 31
nodeptr bin[NHASH];

unsigned int hash(char *p)
{   unsigned int h = 0;
    for ( ; *p; p++)
        h = MULT * h + *p;
    return h % NHASH;
}

#define NODEGROUP 1000
int nodesleft = 0;
nodeptr freenode;

nodeptr nmalloc()
{   if (nodesleft == 0) {
        freenode = malloc(NODEGROUP*sizeof(node));
        nodesleft = NODEGROUP;
    }
    nodesleft--;
    return freenode++;
}

#define CHARGROUP 10000
int charsleft = 0;
char *freechar;

char *smalloc(int n)
{   if (charsleft < n) {
        freechar = malloc(n+CHARGROUP);
        charsleft = n+CHARGROUP;
    }
    charsleft -= n;
    freechar += n;
    return freechar - n;
}

void incword(char *s)
{   nodeptr p;
    int h = hash(s);
    for (p = bin[h]; p != NULL; p = p->next)
        if (strcmp(s, p->word) == 0) {
            (p->count)++;
            return;
        }
    p = nmalloc();
    p->count = 1;
    p->word = smalloc(strlen(s)+1);
    strcpy(p->word, s);
    p->next = bin[h];
    bin[h] = p;
}

int main()
{   int i;
    nodeptr p;
    char buf[100];
    for (i = 0; i < NHASH; i++)
        bin[i] = NULL;
    while (scanf("%s", buf) != EOF)
        incword(buf);
    for (i = 0; i < NHASH; i++)
        for (p = bin[i]; p != NULL; p = p->next)
            printf("%s %d\n", p->word, p->count);
    return 0;
}

15.2 短语

/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */

/* longdup.c -- Print longest string duplicated M times */

#include 
#include 
#include 

//比较函数 
int pstrcmp(char **p, char **q)
{   return strcmp(*p, *q); }

//返回两个参数字符串中共同部分的长度
 
int comlen(char *p, char *q)
{   int i = 0;
    while (*p && (*p++ == *q++))
        i++;
    return i;
}

#define M 1
#define MAXN 5000000
char c[MAXN], *a[MAXN];

int main()
{   int i, ch, n = 0, maxi, maxlen = -1;
    while ((ch = getchar()) != EOF) {
        a[n] = &c[n];
        c[n++] = ch;
    }
    c[n] = 0;
    for(i = 0;i < n;i ++)
        printf("a[%d] = %s",i,a[i]); 
        
    qsort(a, n, sizeof(char *), pstrcmp);
    for(i = 0;i < n;i ++)
        printf("a[%d] = %s\n",i,a[i]); 
    
    for (i = 0; i < n-M; i++)
        if (comlen(a[i], a[i+M]) > maxlen) {
            maxlen = comlen(a[i], a[i+M]);
            maxi = i;
        }
        
    //printf("maxi = %d, maxlen = %d, %s\n", maxi, maxlen, a[maxi]);
    printf("%.*s\n", maxlen, a[maxi]);
    //printf("%s\n",a[maxi]);
    return 0;
}

15.3 生成文本

/* Copyright (C) 2000 Lucent Technologies */
/* Modified from markov.c in 'Programming Pearls' by Jon Bentley */

/* markovlet.c -- generate letter-level random text from input text
    Alg: Store text in an array on input
         Scan complete text for each output character
            (Randomly select one matching k-gram)
 */

#include 
#include 

char x[5000000];

int main()
{   int c, i, eqsofar, max, n = 0, k = 5;
    char *p, *nextp, *q;
    while ((c = getchar()) != EOF)
        x[n++] = c;
    x[n] = 0;
    p = x;
    srand(1);
    for (max = 2000; max > 0; max--) {
        eqsofar = 0;
        for (q = x; q < x + n - k + 1; q++) {
            for (i = 0; i < k && *(p+i) == *(q+i); i++)
                ;
            if (i == k)
                if (rand() % ++eqsofar == 0)
                    nextp = q;
        }
        c = *(nextp+k);
        if (c == 0)
            break;
        putchar(c);
        p = nextp+1;
    }
    return 0;
}

/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */

/* markov.c -- generate random text from input document */

#include 
#include 
#include 

char inputchars[4300000];
char *word[800000];
int nword = 0;
int k = 2;

int wordncmp(char *p, char* q)
{   int n = k;
    for ( ; *p == *q; p++, q++)
        if (*p == 0 && --n == 0)
            return 0;
    return *p - *q;
}

int sortcmp(char **p, char **q)
{   return wordncmp(*p, *q);
}

char *skip(char *p, int n)
{   for ( ; n > 0; p++)
        if (*p == 0)
            n--;
    return p;
}

int main()
{   int i, wordsleft = 10000, l, m, u;
    char *phrase, *p;
    word[0] = inputchars;
    while (scanf("%s", word[nword]) != EOF) {
        word[nword+1] = word[nword] + strlen(word[nword]) + 1;
        nword++;
    }
    for (i = 0; i < k; i++)
        word[nword][i] = 0;
    for (i = 0; i < k; i++)
        printf("%s\n", word[i]);
    qsort(word, nword, sizeof(word[0]), sortcmp);
    phrase = inputchars;
    for ( ; wordsleft > 0; wordsleft--) {
        l = -1;
        u = nword;
        while (l+1 != u) {
            m = (l + u) / 2;
            if (wordncmp(word[m], phrase) < 0)
                l = m;
            else
                u = m;
        }
        for (i = 0; wordncmp(phrase, word[u+i]) == 0; i++)
            if (rand() % (i+1) == 0)
                p = word[u+i];
        phrase = skip(p, 1);
        if (strlen(skip(phrase, k-1)) == 0)
            break;
        printf("%s\n", skip(phrase, k-1));
    }
    return 0;
}

/* Copyright (C) 1999 Lucent Technologies */
/* From 'Programming Pearls' by Jon Bentley */

/* markovhash.c -- generate random text, sped up with hash tables */

/* For storage efficiency (and also to minimize changes from markov.c),
   the hash table is implemented in the integer array next.
   If bin[i]=j, then word[j] is the first element in the list,
   word[next[j]] is the next element, and so on.
 */

#include 
#include 
#include 

char inputchars[4300000];
#define MAXWORDS 800000
char *word[MAXWORDS];
int nword = 0;
int k = 2;

int next[MAXWORDS];
#define NHASH 499979
int bin[NHASH];
#define MULT 31

unsigned int hash(char *ptr)
{   unsigned int h = 0;
    unsigned char *p = ptr;
    int n;
    for (n = k; n > 0; p++) {
        h = MULT * h + *p;
        if (*p == 0)
            n--;
    }
    return h % NHASH;
}

int wordncmp(char *p, char* q)
{   int n = k;
    for ( ; *p == *q; p++, q++)
        if (*p == 0 && --n == 0)
            return 0;
    return *p - *q;
}

int sortcmp(char **p, char **q)
{   return wordncmp(*p, *q);
}

char *skip(char *p, int n)
{   for ( ; n > 0; p++)
        if (*p == 0)
            n--;
    return p;
}

int main()
{   int i, wordsleft = 10000, j;
    char *phrase, *p;
    word[0] = inputchars;
    while (scanf("%s", word[nword]) != EOF) {
        word[nword+1] = word[nword] + strlen(word[nword]) + 1;
        nword++;
    }
    for (i = 0; i < k; i++)
        word[nword][i] = 0;
    for (i = 0; i < NHASH; i++)
        bin[i] = -1;
    for (i = 0; i <= nword - k; i++) { /* check */
        j = hash(word[i]);
        next[i] = bin[j];
        bin[j] = i;
    }
    for (i = 0; i < k; i++)
        printf("%s\n", word[i]);
    phrase = inputchars;
    for ( ; wordsleft > 0; wordsleft--) {
        i = 0;
        for (j = bin[hash(phrase)]; j >= 0; j = next[j])
            if ((wordncmp(phrase, word[j]) == 0)
                && (rand() % (++i) == 0))
                p = word[j];
        phrase = skip(p, 1);
        if (strlen(skip(phrase, k-1)) == 0)
            break;
        printf("%s\n", skip(phrase, k-1));
    }
    return 0;
}

Pearls15

15.1为文档中包含的单词生成一个列表

对文档中每个单词出现的次数做统计

使用自定义的散列表，对文档中每个单词出现的次数做统计

15.3 生成文本

你可能感兴趣的:(Pearls15)