1 import chardet #检测字符类型的类 2 from wordcloud import WordCloud #词云库 3 import matplotlib.pyplot as plt #数学绘图库
1 with open("C:\\Users\\fyc\\Desktop\\virgo.txt", "r") as f: 2 text = f.read() 3 type = chardet.detect(text) 4 text1 = text.decode(type["encoding"])
1 stopwords = set(map(str.lower, self.stopwords)) 2 3 flags = (re.UNICODE if sys.version < '3' and type(text) is unicode 4 else 0) 5 regexp = self.regexp if self.regexp is not None else r"\w[\w']+" 6 7 words = re.findall(regexp, text, flags)
1 wc1 = WordCloud( 2 background_color="white", 3 width=1000, 4 height=860, 5 font_path="C:\\Windows\\Fonts\\STFANGSO.ttf",#不加这一句显示口字形乱码 6 margin=2) 7 wc2 = wc1.generate(text1) #我们观察到generate()接受一个Unicode的对象,所以之前要把文本处理成unicode类型 8 9 plt.imshow(wc2) 10 plt.axis("off") 11 plt.show()
class WordCloud(object) def __init__(self, font_path=None, width=400, height=200, margin=2, ranks_only=None, prefer_horizontal=.9, mask=None, scale=1, color_func=None, max_words=200, min_font_size=4, stopwords=None, random_state=None, background_color='black', max_font_size=None, font_step=1, mode="RGB", relative_scaling=.5, regexp=None, collocations=True, colormap=None, normalize_plurals=True) Documentation is missing. The following is copied from class WordCloud. Word cloud object for generating and drawing. font_path: (string) Font path to the font that will be used (OTF or TTF). Defaults to DroidSansMono path on a Linux machine. If you are on another OS or don't have this font, you need to adjust this path. width: (int (default=400)) Width of the canvas. height: (int (default=200)) Height of the canvas. prefer_horizontal: (float (default=0.90)) The ratio of times to try horizontal fitting as opposed to vertical. If prefer_horizontal < 1, the algorithm will try rotating the word if it doesn't fit. (There is currently no built-in way to get only vertical words.) mask: (nd-array or None (default=None)) If not None, gives a binary mask on where to draw words. If mask is not None, width and height will be ignored and the shape of mask will be used instead. All white (#FF or #FFFFFF) entries will be considerd "masked out" while other entries will be free to draw on. [This changed in the most recent version!] scale: (float (default=1)) Scaling between computation and drawing. For large word-cloud images, using scale instead of larger canvas size is significantly faster, but might lead to a coarser fit for the words. min_font_size: (int (default=4)) Smallest font size to use. Will stop when there is no more room in this size. font_step: (int (default=1)) Step size for the font. font_step > 1 might speed up computation but give a worse fit. max_words: (number (default=200)) The maximum number of words. stopwords: (set of strings or None) The words that will be eliminated. If None, the build-in STOPWORDS list will be used. background_color: (color value (default="black")) Background color for the word cloud image. max_font_size: (int or None (default=None)) Maximum font size for the largest word. If None, height of the image is used. mode: (string (default="RGB")) Transparent background will be generated when mode is "RGBA" and background_color is None. relative_scaling: (float (default=.5)) Importance of relative word frequencies for font-size. With relative_scaling=0, only word-ranks are considered. With relative_scaling=1, a word that is twice as frequent will have twice the size. If you want to consider the word frequencies and not only their rank, relative_scaling around .5 often looks good. color_func: (callable, default=None) Callable with parameters word, font_size, position, orientation, font_path, random_state that returns a PIL color for each word. Overwrites "colormap". See colormap for specifying a matplotlib colormap instead. regexp: (string or None (optional)) Regular expression to split the input text into tokens in process_text. If None is specified, r"\w[\w']+" is used. collocations: (bool, default=True) Whether to include collocations (bigrams) of two words. colormap: (string or matplotlib colormap, default="viridis") Matplotlib colormap to randomly draw colors from for each word. Ignored if "color_func" is specified. normalize_plurals: (bool, default=True) Whether to remove trailing 's' from words. If True and a word appears with and without a trailing 's', the one with trailing 's' is removed and its counts are added to the version without trailing 's' – unless the word ends with 'ss'. Notes Larger canvases with make the code significantly slower. If you need a large word cloud, try a lower canvas size, and set the scale parameter. The algorithm might give more weight to the ranking of the words than their actual frequencies, depending on the max_font_size and the scaling heuristic.
width,height 顾名思义,画布的长宽。
prefer_horizontal :词云的字体优先水平放置
1 plt.imshow(wc2) 2 plt.axis("off") 3 plt.show()