python常用包学习(一)——Matplotlib

笔者是一个痴迷于挖掘数据中的价值的学习人,希望在平日的工作学习中,挖掘数据的价值,找寻数据的秘密,笔者认为,数据的价值不仅仅只体现在企业中,个人也可以体会到数据的魅力,用技术力量探索行为密码,让大数据助跑每一个人,欢迎直筒们关注我的公众号,大家一起讨论数据中的那些有趣的事情。

我的公众号为:livandata

Matplotlib的应用主要是对数据进行可视化,对数据按照图表来呈现,以方便找寻数据的规律,查找可用的方法、算法:

本文主要是对matplotlib的常规方法做一下介绍,另有matplotlib的复杂应用,以及三维数据的可视化有机会会再做补充,此文可以作为入门知识来使用:

# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
from pandas import DataFrame
from pandas import Series
from sklearn.cross_validation import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model.logistic import LogisticRegression
from pylab import *
from scipy.constants.constants import alpha
# predict_proba:预测的应用:
# x_train = np.array([[1,2,3],[1,3,4],[2,1,2],[4,5,6],[3,5,3],[1,7,2]])
# y_train = np.array([3,3,3,2,2,2])
# x_test = np.array([[2,2,2],[3,2,6],[1,7,4]])
# clf=LogisticRegression()
# clf.fit(x_train, y_train)
# print(clf.predict(x_test))
# print(clf.predict_proba(x_test))
# 可以运算每个分类结果发生的概率:测试集中结果为(0,1):
# 创建一个图:
# 折线图:
# squares = [1,4,9,16,25]
# input_values = [1,2,3,4,5]
# plt.plot(input_values, squares, linewidth=5)
# plt.title('Square Number', fontsize=24)
# plt.xlabel('value', fontsize=14)
# plt.ylabel('Square of Value', fontsize=14)
# plt.tick_params(axis='both', labelsize=14)
# plt.show()
# 散点图:
# x_values = [1,2,3,4,5]
# y_values = [1,4,9,16,25]
# plt.scatter(x_values,y_values,s=100)
# plt.title('Square Numbers', fontsize=24)
# plt.xlabel('Value', fontsize=14)
# plt.ylabel('Square of Vaule', fontsize=14)
# plt.tick_params(axis='both', labelsize=14)
# plt.show()
# 自动计算数据:
# x_values = list(range(1, 1001))
# y_values = [x**2 for x in x_values]
# plt.scatter(x_values, y_values, s=1, edgecolors='none', cmap=plt.cm.Blues)
# plt.title('Square Number', fontsize=24)
# plt.xlabel('Value', fontsize=14)
# plt.ylabel('Square of Value', fontsize=14)
# plt.axis([0, 1100, 0, 1100000])
# plt.tick_params(axis='both', labelsize=14)
# plt.show()
# 随机漫步:
# class RandWalk(object):
#     def __init__(self, count=5000):
#         self.count = count
#         self.x_list = [0]
#         self.y_list = [0]
#     def fill_walk(self):
#         while len(self.x_list) #             x_step = self.fill_step()
#             y_step = self.fill_step()
#             if x_step==0 or y_step==0:
#                 continue
#             next_x = self.x_list[-1] + x_step
#             next_y = self.y_list[-1] + y_step
#             self.x_list.append(next_x)
#             self.y_list.append(next_y)
#     def fill_step(self):
#         direction = choice([-1, 1])
#         distance = choice([2,4,6,8,10])
#         return direction*distance
# rw = RandWalk()
# rw.fill_walk()
# point_numbers = list(range(rw.count))
# plt.figure(dpi=128, figsize=(10,6))
# plt.scatter(rw.x_list, rw.y_list, c=point_numbers, cmap=plt.cm.Blues, s=1)
# plt.scatter(0, 0, c='green', edgecolors='none', s=10)
# plt.scatter(rw.x_list[-1], rw.y_list[-1], c='red', edgecolors='none', s=10)
# plt.axes().get_xaxis().set_visible(False)
# plt.axes().get_yaxis().set_visible(False)
# plt.show()
# 其他图形为:
# bar(x,y,marker='s',color='r'):柱状图;
# hist(data, 40, normed=1, histtype='bar', facecolor='yellowgreen', alpha=0.75):直方图
# 设置x,y的坐标值:xlim(-2.5, 2.5);ylim(-1, 1)
# 显示中文和负号代码如下:
# plt.rcParams['font.sas-serig']=['SimHei']:显示中文标签;
# plt.rcParams['axes.unicode_minus']=False:用来正常显示负号;
# 创建子图:
# x=[1,2,3,4]
# y=[3,5,10,25]
# fig = plt.figure()
# ax1 = fig.add_subplot(231)
# plt.plot(x,y,marker='D')
# plt.sca(ax1)
# ax2 = fig.add_subplot(232)
# plt.scatter(x,y,marker='s',color='r')
# plt.sca(ax2)
# plt.grid(True)
# ax3 = fig.add_subplot(233)
# plt.bar(x,y,0.5,color='c')
# plt.sca(ax3)
# ax4 = fig.add_subplot(234)
# mean=0
# sigma=1
# data=mean+sigma*np.random.randn(10000)
# plt.hist(data,40,normed=1,histtype='bar',facecolor='yellowgreen', alpha=0.75)
# plt.sca(ax4)
# m = np.arange(-5.0, 5.0, 0.02)
# n = np.sin(m)
# ax5 = fig.add_subplot(235)
# plt.plot(m,n)
# plt.sca(ax5)
# ax6 = fig.add_subplot(236)
# xlim(-2.5, 2.5)
# ylim(-1,1)
# plt.plot(m,n)
# plt.sca(ax6)
# plt.grid(True)
# plt.show()
# 热图:
# x=[[1,2],[3,4],[5,6]]
# fig = plt.figure()
# ax = fig.add_subplot(231)
# ax.imshow(x)
# ax = fig.add_subplot(232)
# im = ax.imshow(x, cmap=plt.cm.gray)
# ax = fig.add_subplot(233)
# im = ax.imshow(x, cmap=plt.cm.spring)
# plt.colorbar(im)
# ax = fig.add_subplot(234)
# im = ax.imshow(x, cmap=plt.cm.summer)
# plt.colorbar(im, cax=None, ax=None, shrink=0.5)
# ax = fig.add_subplot(235)
# im = ax.imshow(x, cmap=plt.cm.autumn)
# plt.colorbar(im, shrink=0.5, ticks=[-1,0,1])
# ax = fig.add_subplot(236)
# im = ax.imshow(x, cmap=plt.cm.winter)
# plt.colorbar(im, shrink=0.5)
# plt.show()
# 矩阵颜色图:
# def draw_heatmap(data,xlabels,ylabels):
#     cmap=cm.get_cmap('rainbow',1000)
#     figure=plt.figure(facecolor='w')
#     ax=figure.add_subplot(1,1,1,position=[0.1,0.15,0.8,0.8])
#     ax.set_yticks(range(len(ylabels)))
#     ax.set_yticklabels(ylabels)
#     ax.set_xticks(range(len(xlabels)))
#     ax.set_xticklabels(xlabels)
#     vmax=data[0][0]
#     vmin=data[0][0]
#     for i in data:
#         for j in i:
#             if j>vmax:
#                 vmax=j
#             if j #                 vmin=j
#     map=ax.imshow(data,interpolation='nearest',cmap=cmap,aspect='auto',vmin=vmin,vmax=vmax)
#     cb=plt.colorbar(mappable=map,cax=None,ax=None,shrink=0.5)
#     plt.show()
# a=np.random.rand(10,10)
# print(a)
# xlabels=['A','B','C','D','E','F','G','H','I','J']
# ylabels=['a','b','c','d','e','f','g','h','i','j']

# draw_heatmap(a,xlabels,ylabels)

在上面的案例中,用到的一些小知识为:

# predict_proba:主要是用来预测分类出现的概率;
# x_train = np.array([[1,2,3],[1,3,4],[2,1,2],[4,5,6],[3,5,3],[1,7,2]])
# y_train = np.array([3,3,3,2,2,2])
# x_test = np.array([[2,2,2],[3,2,6],[1,7,4]])
# clf=LogisticRegression()
# clf.fit(x_train, y_train)
# print(clf.predict(x_test))
# print(clf.predict_proba(x_test))
# 可以运算每个分类结果发生的概率:测试集中结果为(0,1)
import itertools
# 1)笛卡儿积:itertools.product
# for i in itertools.product('ABCD', repeat=2):
#     print(''.join(i), end=' ')
# a = (1,2,3)
# b = ('A','B','C')
# c = itertools.product(a,b)
# for i in c:
#     print(i, end=' ')
# 2)排列:
# for i in itertools.permutations('ABCD', 2):
#     print(''.join(i), end=' ')
# 3)组合:
# for i in itertools.combinations('ABCD', 3):
#     print(''.join(i))
# 4)组合:
# for i in itertools.combinations_with_replacement('ABCD', 3):
#     print(''.join(i), end=' ')

你可能感兴趣的:(python)