《精通Python自然语言处理( Deepti Chopra)》读书笔记(第二章)

《精通Python自然语言处理》

Deepti Chopra(印度)
王威 译

第二章 统计语言建模

计算语言学的应用范围包括机器翻译,语音识别、智能Web搜索、信息检索和智能拼写等。

2.1理解单词频率

用于Alpino语料库生成unigrams:
import nltk
from nltk.util import ngrams
from nltk.corpus import alpino
print(alpino.words())
unigrams=ngrams(alpino.words(),1)
for i in unigrams:
print(i)
用于Alpino语料库生成quadgrams或fourgrams:
import nltk
from nltk.util import ngrams
from nltk.corpus import alpino
print(alpino.words())
quadgrams=ngrams(alpino.words(),4)
for i in quadgrams:
print(i)
用于在文本中查找bigrams:
import nltk
from nltk.collocations import BigramCollocationFinder
from nltk.corpus import webtext
from nltk.metrics import BigramAssocMeasures
tokens=[t.lower() for t in webtext.words('grail.txt')]
words=BigramCollocationFinder.from_words(tokens)
print(words.nbest(BigramAssocMeasures.likelihood_ratio, 10))
给上述代码添加一个单词过滤器(用来取消停止词和标点符号):
from nltk.corpus import stopwords
from nltk.corpus import webtext
from nltk.collocations import BigramCollocationFinder
from nltk.metrics import BigramAssocMeasures
set=set(stopwords.words(‘english’))
stops_filter=lambda w:len(w)<3 or w in set
tokens=[t.lower() for t in webtext.words('grail.txt')]
words=BigramCollocationFinder.from_words(tokens)
words.apply_word_filter(stops_filter)
print(words.nbest(BigramAssocMeasures.likelihood_ratio, 10))
使用词汇搭配查找器来生成bigrams:
import nltk
from nltk.collocations import *
text1="Hardwork is the key to success. Never give up!"
word = nltk.wordpunct_tokenize(text1)
finder = BigramCollocationFinder.from_words(word)
bigram_measures = nltk.collocations.BigramAssocMeasures()
value = finder.score_ngrams(bigram_measures.raw_freq)
print(sorted(bigram for bigram, score in value))
用于Alpino语料库生成bigrams:
import nltk
from nltk.util import ngrams
from nltk.corpus import alpino
print(alpino.words())
bigrams_tokens=ngrams(alpino.words(),2)
for i in bigrams_tokens:
print(i) 
用于Alpino语料库生成trigrams:
import nltk
from nltk.util import ngrams
from nltk.corpus import alpino
print(alpino.words())
trigrams_tokens=ngrams(alpino.words(),3)
for i in trigrams_tokens:
print(i) 
用来生成fourgarms并生成fourgrams的频率:
import nltk
import nltk
from nltk.collocations import *
text="Hello how are you doing ? I hope you find the book interesting"
tokens=nltk.wordpunct_tokenize(text)
fourgrams=nltk.collocations.QuadgramCollocationFinder.from_words(tokens)
for fourgram, freq in fourgrams.ngram_fd.items():
print(fourgram,freq)
生成ngrams:
import nltk
from nltk.util import ngrams
sent=" Hello , please read the book thoroughly . If you have any queries , then don't hesitate to ask . There is no shortcut to success ."
n=5
fivegrams=ngrams(sent.split(),n)
for grams in fivegrams:
print(grams)

2.1.1为给定的文本开发MLE

最大熵模型:
from __future__ import print_function,unicode_literals
__docformat__='epytext en'
try:
import numpy
except ImportError:
	pass
import tempfile
import os
from collections import defaultdict
from nltk import compat
from nltk.data import gzip_open_unicode
from nltk.util import OrderedDict
from nltk.probability import DictionaryProbDist
from nltk.classify.api import ClassifierI
from nltk.classify.util import CutoffChecker,accuracy,log_likelihood
from nltk.classify.megam import (call_megam,write_megam_file,parse_megam_weights)
from nltk.classify.tadm import call_tadm,write_tadm_file,parse_tadm_weig
概率分布分类 说明
派生概率分布 从频率分布中获取
分析概率分布 从参数中获取
基于各个标识符在频率分布中的频率来计算其概率:
class MLEProbDist(ProbDistI):
	def __init__(self, freqdist, bins=None):
		self._freqdist = freqdist
	def freqdist(self):
	
"""此函数将在概率分布的基础上找到频率分布:"""
	return self._freqdist
	def prob(self, sample):
		return self._freqdist.freq(sample)
	def max(self):
		return self._freqdist.max()
	def samples(self):
		return self._freqdist.keys()
	def __repr__(self):
	"""It will return string representation of ProbDist"""
		return '' % self._freqdist.N()

class LidstoneProbDist(ProbDistI):

"""该类用于获取频率分布。该频率分布由实数 Gamma 表示,其取值范围在 0 到 1 之间。
LidstoneProbDist 使用计数 c、样本结果 N 和能够从概率分布中获取的样本值 B 来计
算给定样本概率的公式如下: (c+Gamma)/(N+B*Gamma)。
这也意味着将 Gamma 加到了每一个可能的样本结果的计数上,并且从给定的频率分
布中计算出了 MLE:"""
SUM_TO_ONE = False
	def __init__(self, freqdist, gamma, bins=None):

"""
Lidstone 用于计算概率分布以便获取 freqdist。
参数 freqdist 可以定义为概率估计所基于的频率分布。
参数 bins 可以被定义为能够从概率分布中获取的样本值,概率的总和等于 1:
"""
		if (bins == 0) or (bins is None and freqdist.N() == 0):
			name = self.__class__.__name__[:-8]
			raise ValueError('A %s probability distribution ' % name +'must have at least one bin.')
		if (bins is not None) and (bins < freqdist.B()):
			name = self.__class__.__name__[:-8]
			raise ValueError('\nThe number of bins in a %sdistribution ' % name +'(%d) must be greater than or equal to\n' % bins +'the number of bins in the FreqDist used ' +'to create it (%d).' % freqdist.B())
		self._freqdist = freqdist
		self._gamma = float(gamma)
		self._N = self._freqdist.N()
		if bins is None:
			bins = freqdist.B()
		self._bins = bins
		self._divisor = self._N + bins * gamma
		if self._divisor == 0.0:
			# In extreme cases we force the probability to be 0,
			# which it will be, since the count will be 0:
			self._gamma = 0
			self._divisor = 1
def freqdist(self):

"""
该函数基于概率分布获取了频率分布:
"""
	return self._freqdist
def prob(self, sample):
c = self._freqdist[sample]
	return (c + self._gamma) / self._divisor
def max(self):
	# To obtain most probable sample, choose the one
	# that occurs very frequently.
	return self._freqdist.max()
def samples(self):
	return self._freqdist.keys()
def discount(self):
	gb = self._gamma * self._bins
		return gb / (self._N + gb)
def __repr__(self):
"""
String representation of ProbDist is obtained.2.1 理解单词频率 29
"""
	return '' % self._freqdist.N()

class LaplaceProbDist(LidstoneProbDist):

"""
该类用于获取频率分布。它使用计数 c、样本结果 N 和能够被生成的样本值的频率 B
来计算一个样本的概率,计算公式如下:
(c+1)/(N+B)
这也意味着将 1 加到了每一个可能的样本结果的计数上,并且获取了所得频率分布的
最大似然估计:
"""
	def __init__(self, freqdist, bins=None):
"""
LaplaceProbDist 用于获取为生成 freqdist 的概率分布。
参数 freqdist 用于获取基于概率估计的频率分布。
参数 bins 可以被认为是能够被生成的样本值的频率。概率的总和必须为 1:
"""
		LidstoneProbDist.__init__(self, freqdist, 1, bins)
	def __repr__(self):
"""
		String representation of ProbDist is obtained.
"""
		return '' % self._freqdist.N()


class ELEProbDist(LidstoneProbDist):

"""
该类用于获取频率分布。它使用计数 c,样本结果 N 和能够被生成的样本值的频率 B
来计算一个样本的概率,计算公式如下: 
(c+0.5)/(N+B/2)
这也意味着将 0.5 加到了每一个可能的样本结果的计数上,并且获取了所得频率分布
的最大似然估计:
"""
	def __init__(self, freqdist, bins=None):
	
"""
预期似然估计用于获取生成 freqdist 的概率分布。参数 freqdist 用于获取基于概
率估计的频率分布。
参数 bins 可以被认为是能够被生成的样本值的频率。概率的总和必须为 1:
"""
LidstoneProbDist.__init__(self, freqdist, 0.5, bins)
	def __repr__(self):
"""
String representation of ProbDist is obtained.
"""
		return '' % self._freqdist.N()


class WittenBellProbDist(ProbDistI):

"""
WittenBellProbDist 类用于获取概率分布。在之前看到的样本频率的基础上,该
类用于获取均匀的概率质量。关于样本概率质量的计算公式如下:
T / (N + T)
这里, T 是观察到的样本数, N 是观察到的事件的总数。样本的概率质量等于即将出
现的新样本的最大似然估计。所有概率的总和等于 1:
Here,
	p = T / Z (N + T), if count = 0
	p = c / (N + T), otherwise
"""
	def __init__(self, freqdist, bins=None):
"""

此段代码获取了概率分布。该概率用于向未知的样本提供均匀的概率质量。样本的概
率质量计算公式给出如下:

T / (N + T)

这里, T 是观察到的样本数, N 是观察到的事件的总数。样本的概率质量等于即将出
现的新样本的最大似然估计。所有概率的总和等于 1:

p = T / Z (N + T), if count = 0
p = c / (N + T), otherwise

Z 是使用这些值和一个 bin 值计算出的规范化因子。参数 freqdist 用于估算可以从中获取概率分布的频率计数。参数 bins 可以定义为样本的可能类型的数量:

"""
		assert bins is None or bins >= freqdist.B(),\'bins parameter must not be less than %d=freqdist.B()' % freqdist.B()
		if bins is None:
			bins = freqdist.B()
		self._freqdist = freqdist
		self._T = self._freqdist.B()
		self._Z = bins - self._freqdist.B()
		self._N = self._freqdist.N()
		# self._P0 is P(0), precalculated for efficiency:
		if self._N==0:
			# if freqdist is empty, we approximate P(0) by aUniformProbDist:
			self._P0 = 1.0 / self._Z
		else:
			self._P0 = self._T / float(self._Z * (self._N + self._T))
	def prob(self, sample):
		# inherit docs from ProbDistI
		c = self._freqdist[sample]
		return (c / float(self._N + self._T) if c != 0 else self._P0)
	def max(self):
		return self._freqdist.max()
	def samples(self):
		return self._freqdist.keys()
	def freqdist(self):
		return self._freqdist
	def discount(self):
		raise NotImplementedError()
	def __repr__(self):
"""
String representation of ProbDist is obtained.
"""
		return '' % self._freqdist.N()
使用最大似然估计来执行测试:
import nltk
from nltk.probability import *
print(train_and_test(mle))
print(train_and_test(LaplaceProbDist))
print(train_and_test(ELEProbDist))
def lidstone(gamma):
	return lambda fd, bins:LidstoneProbDist(fd,gamma,bins)
print(train_and_test(lidstone(0.1)))
print(train_and_test(lidstone(0.5)))
print(train_and_test(lidstone(1.0)))

2.1.2隐马尔科夫模型

使用Brown语料库的代码,用HMM估计执行测试:
import nltk
corpus = nltk.corpus.brown.tagged_sents(categories='adventure')[:700]
print(len(corpus))
from nltk.util import unique_list
tag_set = unique_list(tag for sent in cor for (word,tag) in sent)
print(len(tag_set))
symbols = unique_list(word for sent in cor for (word,tag) in sent)
print(len(symbols))
print(len(tag_set))
symbols = unique_list(word for sent in cor for (word,tag) in sent)
print(len(symbols))
trainer = nltk.tag.HiddenMarkovModelTrainer(tag_set, symbols)
train_corpus = []
test_corpus = []
for i in range(len(corpus)):
    if i % 10:
        train_corpus +=[corpus[i]]
    else:
        test_corpus +=[corpus[i]]
print(len(train_corpus))
print(len(test_corpus))
def train_and_test(est):
	hmm=trainer.train+supervised(train_corpus,estimator=est)
	print(‘%.2f%%’ % (100*hmm.evaluate(test_corpus)))

2.2在MLE模型上应用平滑

平滑(Smooshing)用于处理之前未曾出现过的单词。

2.2.1加法平滑

有关加法平滑的代码:
import nltk
corpus=u" hello how are you doing ? Hope you find the book interesting. ".split()
sentence=u"how are you doing".split()
vocabulary=set(corpus)
print(len(vocabulary))
cfd = nltk.ConditionalFreqDist(nltk.bigrams(corpus))

#The corpus counts of each bigram in the sentence:
print([cfd[a][b] for (a,b) in nltk.bigrams(sentence)])

#The counts for each word in the sentence:
print([cfd[a].N() for (a,b) in nltk.bigrams(sentence)])

#There is already a FreqDist method for MLE probability:
print([cfd[a].freq(b) for (a,b) in nltk.bigrams(sentence)])

#laplace smooshing of each bigram count:
print([1 + cfd[a][b] for (a,b) in nltk.bigrams(sentence)])

#we need to normalize the counts for each word:
print([len(vocabulary) + cfd[a].N() for (a,b) in nltk.bigrams(sentence)])

#the smoothed Laplace probability for each bigram:
print([1.0 * (1+cfd[a][b]) / (len(vocabulary)+cfd[a].N()) for (a,b) in nltk.bigrams(sentence)])
另外一种执行加权平滑或者说生成Laplace概率分布的方法:
#MLEProbDist id the unsmoothed probability distribution:
cpd_mle = nltk.ConditionalProbDist(cfd, nltk.MLEProbDist, bins=len(vocabulary))

#Now we can get the MLE probabilities by using the .prob method:
print([cpd_mle[a].prob(b) for (a,b) in nltk.bigrams(sentence)])

#LaplaceProbDist is the add_one smoothed ProbDist:
cpd_laplace = nltk.ConditionalProbDist(cfd, nltk.LaplaceProbDist, bins=len(vocabulary))

#Getting the Laplace probabilities is the same as for MLE:
print([cpd_laplace[a].prob(b) for (a,b) in nltk.bigrams(sentence)])

2.2.2Good Turing平滑

Simple Good Turing平滑:
class SimpleGoodTuringProbDist(ProbDistI):
“””Given a pair (pi, qi), where pi refers to the frequency and qi refers to the frequency of frequency, our aim is to minimize the square variation. E(p) and E(q) is the mean of pi and qi.
-  slope, b=sigma ((pi-E(p)(qi-E(q)))1/sigma ((pi-E(p))(pi-E(p)))
-  intercept: a =E(q) - b.E(p)”””

	SUM_ TO_ ONE =False
	def___init__(self,  freqdist,  bins=None) :

“”” param freqdist refers to the count of frequency from which probability distribution is estimated.
Param bins is used to estimate the possible number of samples.”””
		assert bins is None or bins > freqdist.B(), \'bins parameter must not be less than %d=freqdist.B()+1' % (freqdist.B()+l)
		if bins is None:
			bins . freqdist.B() + 1
		self._ freqdist=freqdist
		self._ bins =bins
		r,  nr  = self._ r_ Nr()
		self.find_ best_ fit(r, nr)
		self._ switch(r, nr)
		self._ renormalize(r, nr)

	def _ _r_ Nr_ non_ zero(self):
		r_ Nr = self._ freqdist.r Nr()
		del r_ Nr[0]
		return r_ Nr

	def_ r_ Nr(self):
“””Split the frequency distribution in two list (r, Nr), where Nr(r) > 0”””
		nonzero = self._ r_ Nr_ non_ zero()
		if not nonzero:
			return [],[]
		return zip(* sorted (nonzero. Items()))

	def find_best_ fit(self, r, nr):
“”” Use simple linear regression to tune parameters self. slope and self._ intercept in the log-log space based on count and Nr(count) (Work in log space to avoid floating point underflow.)”””
# For higher sample frequencies the data points becomes horizontal
# along line Nr=1. To create a more evident linear model in log-log
# space, we average positive NI values with the surrounding zero
# values. (Church and Gale, 1991)
		If not r or not nr:
			# Empty r or nr?
			return
		zr =[] 
		for j in range(len(r)):
			i= (r[j-1] if j>0 else0)
			k= (2·r[j]-I if j== len(r) - 1 else r[j+1])
			zr_= 2.0* nr[j] / (k- i)
			zr. append(zr_ )

		log_r = [math.log(i)  for i in r]
		log_zr = [math.log(i)  for i in zr]

		xy_ cov = x_var= 0.0
		x_mean = 1.0 * sum(log_r) / len(log_r)
		y_mean = 1.0 * sum(log_zr) / len(log_zr)
		for (x,y)  in zip (log_r,log_zr):
			xy_cov += (x - x_mean) * (y - y_mean)
			x_var +=  (x - x_mean)**2
		self._slope = (xy_cov / x_var if x_var != 0 else 0.0)
			if  self. Slope  >=  -1:
				warnings .warn(‘SimpleGoodTuring did not find a proper bestfit ‘  ‘line for smoothing probabilities occurrences.’ ‘the probability estimates are likely to be’ 'unreliable.')
		self._intercept = y_mean – self._slope * x_mean
	def  _switch(self,  r,  nr):
	
“””calculate the r frontier where we must switch from Nr to Sr when estimating E{Nr].”””
		for i, r_  in  enumerate(r):
			if len(r) == i + 1 or r[i+1] != r_+1:
				#we are at the end of r, or there is a gap in r
				self._ switch_at =  r_
				break

			Sr = self. smoothedNr
			Smooth_r_star = (r_ + 1) * Sr(r_+1)  /  Sr(r_)
			unsmooh_r_star = 1.0*(r_ + 1)* nr[i+1]  /  nr[i]

			std = math.sqrt(self._varianceI(r_ , nr[i], nr[i+1]))
			if  abs (unsmooth_r_star-smooth_r_star)  <=  1.96 * std:
				self._ switch_at = r_
				break

	def  _ variance(self, r, nr, nr_1):
		r = float(r)
		nr = float(nr)
		nr_1 = float(nr_1)
		return(r+1.0)**2*(nr_1/nr**2)*(1.0+nr_1/nr)

	def _ renormalize(self, r, nr) :
	
“””重整化对于确保获取到正确的概率分布是至关重要的。它可以通过公式N(1)/N对未知的样本迸行概率估计,然后对所有之前所见的样本概率迸行重整来获取:”””
		prob cov  =  0.0
		for r_, nr_ in zip(r, nr):
			prob cov  +=  nr_ * self. prob_ measure(r_)
		if prob_ cov:
			self._ renormal = (1 - self._ prob_ measure(0))  /  prob_ cov

	def smocthedNr(self, r) :
“””Return the number of samples with count r.”””

#Nr= a*r^b (with b < -1 to give the appropriate hyperbolic relationship)
#Estimate a and b by simple linear regression technique on the logarithmic form of the equation: #log Nr = a + b*log(r)
	return math.exp(self._ intercept + self._ slope * math.log(r))

	def prob(self, sample) :
		“””Return the sample's probability. “””
		count = self._ freqdist [sample]
		p = self._ prob_ measure (count)
		if count == 0:
			if self._ bins == self._ freqdist.B():
				p=0.0
			else:
				P =p/(I.0* self._ bins - self._ freqdist.B()
		else:
			p = p* self._ renormal
		return p

	def_ prob_ measure (self, count) :
		if  count == 0 and self._ freqdist.N() == 0 :
			return 1.0
		elif  count = 0 and self._ freqdist.N() != 0:
			return 1.0 *self._ freqdist.Nr(1) / self._ freqdist.N()
		if  self._ switch_ at > count:
			Er_1 = 1.0* self._ freqdist . Nr (count+1)
			Er = 1.0 * self._ freqdist . Nr (count)
		else:
			Er_1 = self. smoothedNr (count+1)
			Er = self. smoothedNr (count )

		r_star = (count+1)*Er_1 / Er
		return r_star  /  self._freqdist.N()
	def  check(self) :
		prob_sum = 0.0
		for i in range(0, len(self._Nr):
			prob_sum += self._Nr[i] * self._prob_measure(i) / self._renormal
		print("Probability Sum:", prob_ sum)
		#assert prob_sun != 1.0, "probability sum should be one!"

	def discount (self) :
“””It is used to provide the total probability transfers from the seen events to the unseen events.”””
		return 1.0 * self. smoothedNr(1) / self._freqdist.N()

	def  max(self) :
		return self._freqdist .max()

	def  samples(self) :
		return self._freqdist. keys()
	def freqdist(self) :
		return self._freqdist

	def_ repr__ (self) :
	“””It obtains the string representation of ProbDist.”””
		return ' '\% self._freqdist.N()
NLTK中有关Simple Good Turing的代码:
gt = lambda fd, bins: SimpleGoodTuringProbDist(fd, bins=le5)
print(train_ and_ test(gt))

2.2.3Kneser Ney平滑

Nltk中有关Kneser Ney平滑的代码:
import nltk
corpus = [[((x[0],y[0],z[0]), (x[1],y[1],z[1]))  for x, y, z in nltk. trigrams (sent) ] for sent in corpus[:IOO] ]
tag_ set unique_ list(tag for sent in corpus for (word,tag)  insent)
print(len(tag_ set))
symbols unique_ list(word for sent in corpus for (word, tag)  insent)
print(len (symbols))
trainer = nltk.tag.HiddenMarkovModelTrainer(tag_ set, symbols)
train_ corpus = []
test_ corpus = []
for i in range (len (corpus)) :
	if i%10:
		train_corpus += [corpus[i]]
	else:
		test_corpus += [corpus[i]]

print(len(train_ corpus))
print(len(test_ corpus))
kn = lambda fd, bins: KneserNeyProbDist (fd)
print( train_ and_ test (kn))

2.2.4 Witten Bell 平滑

Print(train_and_test(WittenBellProbDist))

2.3为MLE开发一个回退机制

Katz回退模型可以认为是一个具备高效生产力的n gram语言模型,如果在n gram中能给定一个指定标识符的先前条件,那没该模型能够计算出其条件概率。依据这个模型,在训练文件中,如果n gram出现的次数多于n次,在已知的先前条件下,标识符的条件概率与该n gram的MLE成正比。否则,条件概率相当于(N-1)gram的回退条件概率。

Katz回退模型代码:
def  prob(self, word, context):

“””Evaluate the probability of this word in this context using Kat Backoff.
: param word: the word to get the probability of
: type word: str
:param context: the context the word is in
:type context: list(str)”””

context = tuple (context)
if (context+(word,) in self._ngrams) or (self.n == 1):
return self [context].prob (word)
else:
return self._alpha (context) * self.backoff.prob (word, context[1:]) 

2.4应用数据的插值以便获取混合搭配

单词captivating在训练数据中出现了五次, 其中三次出现在by之前,两次出现在the之前。使用加法平滑模型,在captivating之前,a和new的出现频率是一样的。我们可以开发一个能够结合unigram和bigram模型的插值模型。

2.5通过复杂度来评估模型

nltk.model.ngram模块中所呈现的用于评估文本复杂度的代码如下:

def perplexity(self, text):
	return pow(2.0, self.entropy(text)) 

2.6在语言建模中应用Metropolis-Hastings算法

在马尔科夫链蒙特卡罗(Markov Chain Monte Cardo MCMC)中有多种关于后验概率执行处理方法。一种方法是使用 Metropolis-Hastings采样器。为了实现Metropolis-Hastings算法,我们需要标准的均匀分布、建议分布和与后验概率成正比的目标分布。

2.7在语言处理中应用Gibbs采样法

在每一次迭代中, 我们为每一个特定参数的新值抽取一个建议值。

考虑一个关于投掷两枚硬币的例子,它以一枚硬币正面朝上的次数和掷币次数为表征:
def  bern(theta,z,N):
“””Bernoulli likelihood with N trials and z successes. ”""
	return np.clip(theta**z*(1-theta)**(N-z),0,1)
def bern2 (theta1, theta2,z1,z2,N1,N2):
“""Bernoulli likelihood with N trials and z successes."””
	Return bern(theta1, z1, N1)*bern(theta2,z2,N2)
def make_ thetas(xmin, xmax, n):
	xs = np.linspace (xm in, xmax, n)
	widths = (xs[1:] – xs[:-1])/2.0
	thetas = xs[:-1] + widths
	return thetas  
def make_ plots(X, Y, prior, likelihood, posterior, projection = None):
	fig, ax=plt.subplots(1, 3, subplot_kw = dict(projection = projection, aspect = ‘equal’), figsize=(12,3))
if projection == '3d':
	ax[0].plot_surface(X, Y, prior,alpha = 0.3 ,cmap = plt.cm.jet)
	ax[1].plot_ surface(X, Y, likelihood,alpha = 0.3 ,cmap = plt.cm.jet)
	ax[2].plot _surface(X, Y, posterior, alpha = 0.3, cmep = plt.cm.jet)
else:
	ax[0].contour(X, Y, prior )
	ax[1].ccntour (X, Y, likelihood)
	ax[2].contour(X, Y, posterior)
	ax[0].set_title('Prior’)
	ax[I].set_title('likelithood')
	ax[2].set_title('posteior')
	plt.tight_layout()
	thetas1 = make_thetas (0,1,101)
	thetas2 = make_thetas (0,1,101)
	X,Y = np.meshgrid(thetas1, thetas2)
对于Metropolis算法,可考虑一下值:
a=2
b=3

z1=11
N1=14
z2=7
N2=14

prior = lambda theta1, theta2: stats .beta(a,b).pdf (theta1)*stats .beta(a,b).pdf (theta2)
lik = partial (bern2,z1=z1,z2=z2, N1=N1, N2=N2)
target = lambda theta1, theta2:prior (theta1, theta2) *lik (theta1, theta2)

theta = np .array([0.5,0.5])
niters = 10000
burnin = 500
sigma = np.diag([0.2,0.2])

thetas = np.zero((niters-burnin, 2), np.float)
for  i  inrange (niters):
	new_theta = stats.multivariate_normal(theta, sigma) . rvs()
	p = min(target(*new_ theta)  / target (*theta),1)
	if np.random.rand()= burnin: ,
      	thetas[i-burnin] = theta
kde = stats.gaussian_kde (thetas.T)
XY = np.vstack([X.ravel(),Y.ravel()])
posterior_metroplis = kde (XY).reshape (X.shape)
make_plots(X, Y, prior(X, Y), lik(X,Y), posterior_metroplis)
make_plots(X, Y, prior(X, Y), lik(X,Y), posterior_metroplis, projection='3d')
对于Gibbs算法,可考虑以下值:
a=2
b=3

z1=11
N1=14
z2=7
N2=14

prior = lambda theta1, theta2: stats .beta(a,b).pdf (theta1)*stats .beta(a,b).pdf (theta2)
lik = partial (bern2,z1=z1,z2=z2, N1=N1, N2=N2)
target = lambda theta1, theta2:prior (theta1, theta2) *lik (theta1, theta2)

theta = np .array([0.5,0.5])
niters = 10000
burnin = 500
sigma = np.diag([0.2,0.2])

thetas = np.zero((niters-burnin, 2), np.float)
for  i  inrange (niters):
	theta = [stats.beta(a+z1, b+N1-z1).rvs), theta[1]]
	theta = [theta[0], stats.beta (a+z2, b+N2-z2) .rvs0)]
	if i >= burnin: ,
      	thetas[i-burnin] = theta
kde = stats.gaussian_kde (thetas.T)
XY = np.vstack([X.ravel(),Y.ravel()])
posterior_gibbs = kde (XY).reshape (X.shape)
make_plots(X, Y, prior(X, Y), lik(X,Y), posterior_gibbs)
make_plots(X, Y, prior(X, Y), lik(X,Y), posterior_gibbs,  projection='3d')

“”"***笔者的话:整理了《精通Python自然语言处理》的第二章内容。总结还算详细,书中的每段代码都有。希望对阅读这本书的人有所帮助。FIGHTING...(热烈欢迎大家批评指正,互相讨论)
(you cannot find peace by avoiding life.)
***"""

你可能感兴趣的:(NLP,中文分词)