文章目录
- 视频出处
- numpy
- pandas
- matplotlib
视频出处
numpy
import numpy as np
array = np.array([[1,2,3],
[2,3,4]])
print(array)
print('number of dimensions' + array.ndim)
print('shape' + array.shape)
print('size' + array.size)
"""
[[1 2 3]
[2 3 4]]
number of dimensions 2
shape (2, 3)
size 6
进程已结束,退出代码0
"""
import numpy as np
a = np.array([2,23,4], dtype = int32)
a = np.zeros((3,4))
a = np.empty((3,4))
a = np.arrange(10,20,2)
a = np.arange(12).reshape((3,4))
"""
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
"""
a = np.linspace(1,10,5)
a = np.linspace(1,10,5).reshape((3,4))
import numpy as np
a = np.arange(2, 14).reshape(3, 4)
print(a)
print(np.argmin(a))
print(np.argmax(a))
print(a.mean())
print(np.mean(a))
print(np.cumsum(a))
print(np.diff(a))
print(np.nonzero(a))
print(np.sort(a))
print(a.T)
print(np.transpose(a))
print(a.T.dot(a))
print(np.clip(a, 5, 9))
print(np.mean(a, axis=0))
print(np.mean(a, axis=1))
print("\n\n")
A = np.arange(3, 15).reshape((3, 4))
print(A)
print(A[0][0], A[0, 0])
print(A[:, 2])
print(A.flatten())
print(A.flat)
for item in A.flat:
print(item, end=" ")
print()
A = np.array([1, 1, 1])
B = np.array([2, 2, 2])
C = np.vstack((A, B))
D = np.hstack((A, B))
print(A.shape, C.shape, D.shape)
print(A, A.shape)
A = A[np.newaxis, :]
print(A, A.shape)
A = A[:, np.newaxis]
print(A, A.shape)
"""
此时print(A, A.shape)为
[[1]
[1]
[1]] (3, 1)
"""
A = np.array([1, 1, 1])
C = np.concatenate((A, B), axis=0)
print("C=", C)
A = np.array([1, 1, 1])[:, np.newaxis]
B = np.array([2, 2, 2])[:, np.newaxis]
C = np.concatenate((A, B), axis=1)
print("C=", C)
print("\n\n\n")
A = np.arange(1, 13).reshape(3, 4)
B = np.split(A, 2, axis=1)
print(B)
B = np.array_split(A, 3, axis=1)
print(B)
print("A.shape=", A.shape)
print(np.vsplit(A, 3))
print(np.hsplit(A, 4))
A = np.arange(4, dtype=float)
print(A)
B = A
D = B
B = A.copy()
pandas
import pandas as pd
import numpy as np
s = pd.Series([1,3,6,np.nan,44,1])
print(s)
dates = pd.date_range('20160101', periods=6)
print(dates)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=['a', 'b', 'c', 'd'])
print(df)
print(df.loc['2016-01-01', 'a'])
print(df.index)
print(df.columns)
print(df.describe())
date = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.arange(24).reshape(6, 4), index=date, columns=['A', 'B', 'C', 'D'])
print("原数据为:\n------------------\n", df, "\n---------------------------\n")
print("df['A']= \n", df['A'], "\ndf.A= ", df.A)
print("df[0:3]=\n", df[0:3])
print("df['20130102':'20130104']=\n", df['20130102':'20130104'])
print(df.loc['20130102'])
print(df.loc[:, 'A'])
print(df.loc[:, ['A', 'B']])
print(df.loc['20130102', ['A', 'B']])
print(df.iloc[1])
print(df.iloc[3, 1])
print(df.iloc[2:4, 1:3])
print(df.iloc[[1, 3, 5], 1:3])
print(df)
print(df[df.A > 8])
df.iloc[1, 1] = 11111
df.loc['20130103', 'C'] = 465468
print(df)
df[df > 9] = 0
df[df > 9] = 0 等价于 df[df.A > 9] = 0
df.A[df.A > 9] = 0
df['F'] = np.nan
df['E'] = pd.Series([1, 2, 3, 4, 5, 6], index=df.index)
print(df)
df = pd.DataFrame(np.arange(24).reshape(6, 4), index=date, columns=['A', 'B', 'C', 'D'], dtype=int)
df.iloc[0, 1] = np.nan
df.iloc[1, 2] = np.nan
print(df)
print(df.dropna(axis=0, how='any'))
print(df.fillna(value=0))
print(df.isnull())
print(np.any(df.isnull()) == True)
data = pd.read_csv(r'E:\EdgeDownloads\student.csv')
print(data)
df1 = pd.DataFrame(np.ones((3, 4))*0, columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4))*1, columns=['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3, 4))*2, columns=['a', 'b', 'c', 'd'])
print(df1, '\n', df2, '\n', df3)
res1 = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
print("上下合并df1 df2的结果:\n", res1)
res2 = pd.concat([df1, df2, df3], axis=1)
print("左右合并df1 df2的结果:\n", res2)
df1 = pd.DataFrame(np.ones((3, 4))*0, index=[1, 2, 3], columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4))*1, index=[2, 3, 4], columns=['b', 'c', 'd', 'e'])
res = pd.concat([df1, df2], join='inner', ignore_index=True)
print(res)
df1 = pd.DataFrame(np.ones((3, 4))*0, columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4))*1, columns=['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3, 4))*1, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = df1.append(df2, ignore_index=True)
print(res)
res = df1.append([df2, df3])
print(res)
df1 = pd.DataFrame(np.ones((3, 4))*0, columns=['a', 'b', 'c', 'd'])
s1 = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
res = df1.append(s1, ignore_index=True)
print("df1=\n", df1, "\ns1=\n", s1, "\nres=\n", res)
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
print(right)
res = pd.merge(left, right, on='key')
print(res)
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
'key2': ['K0', 'K0', 'K0', 'K0'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
print(right)
res = pd.merge(left, right, on=['key1', 'key2'], how='inner')
res = pd.merge(left, right, on=['key1', 'key2'], how='left')
print(res)
"""
key1 key2 A B
0 K0 K0 A0 B0
1 K0 K1 A1 B1
2 K1 K0 A2 B2
3 K2 K1 A3 B3
key1 key2 C D
0 K0 K0 C0 D0
1 K1 K0 C1 D1
2 K1 K0 C2 D2
3 K2 K0 C3 D3
key1 key2 A B C D
0 K0 K0 A0 B0 C0 D0
1 K0 K1 A1 B1 NaN NaN
2 K1 K0 A2 B2 C1 D1
3 K1 K0 A2 B2 C2 D2
4 K2 K1 A3 B3 NaN NaN
"""
df1 = pd.DataFrame({'col1': [0, 1], 'col_left': ['a', 'b']})
df2 = pd.DataFrame({'col1': [1, 2, 2], 'col_right': [2, 2, 2]})
print(df1)
print(df2)
res = pd.merge(df1, df2, on='col1', how='outer', indicator=True)
print(res)
res = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')
print(res)
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
'B': ['B0', 'B1', 'B2']},
index=['K0', 'K1', 'K2'])
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
'D': ['D0', 'D2', 'D3']},
index=['K0', 'K2', 'K3'])
print(left)
print(right)
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
print(res)
"""
A B C D
K0 A0 B0 C0 D0
K1 A1 B1 NaN NaN
K2 A2 B2 C2 D2
K3 NaN NaN C3 D3
"""
res = pd.merge(left, right, left_index=True, right_index=True, how='inner')
print(res)
"""
A B C D
K0 A0 B0 C0 D0
K2 A2 B2 C2 D2
"""
boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})
res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')
print(res)
"""
k age_boy age_girl
0 K0 1 4
1 K0 1 5
"""
import matplotlib.pyplot as plt
data = pd.Series(np.random.randn(1000), index=np.arange(1000))
data = data.cumsum()
data.plot()
plt.show()
data = pd.DataFrame(np.random.randn(1000, 4),
index=np.arange(1000),
columns=list("ABCD"))
data = data.cumsum()
data.plot()
"""
plot methods:
'bar', 'hist', 'box', 'kde', 'area', 'scatter', 'hexbin', 'pie'
"""
ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label='Class 1')
data.plot.scatter(x='A', y='C', color='DarkGreen', label='Class 2', ax=ax)
plt.show()
matplotlib
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.gridspec as gridspec
from matplotlib import animation
x = np.linspace(-1, 1, 50)
y = 2*x+1
plt.plot(x,y)
plt.show()
x = np.linspace(-3, 3, 50)
y1 = 2*x+1
y2 = x**2
plt.figure()
plt.plot(x, y1)
plt.figure(num=3, figsize=(20, 10))
plt.plot(x, y2, color='red', linewidth=1.0, linestyle='--')
plt.plot(x, y1, color='green', linewidth=2.0, linestyle='dotted')
plt.xlim((-1, 2))
plt.ylim((-2, 3))
plt.xlabel('I am x', fontsize=20)
plt.ylabel('I am y', fontsize=20)
new_ticks = np.linspace(-1, 2, 5)
print(new_ticks)
plt.xticks(new_ticks)
plt.yticks([-2, -1.8, -1, 1.2, 2],
[r'$really\ bad$', '$bad$', r'$normal$', r'$good$', r'$very\ good$'])
ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.spines['bottom'].set_position(('data', 0))
ax.spines['left'].set_position(('data', 0))
plt.figure()
l1, = plt.plot(x, y1, label='up')
l2, = plt.plot(x, y2, color='red', linewidth=1, linestyle='--', label='down')
plt.legend(handles=[l1, l2, ],labels=['y1', 'y2'], loc='best')
x0 = 1
y0 = 2*x0+1
plt.scatter(x0, y0, s=50, color='b')
plt.plot([x0, x0], [y0, -6], 'k--', lw=2)
plt.annotate(r'$2x+1=%s$'%y0, xy=(x0, y0), xycoords='data', xytext=(+30, -30), textcoords='offset points',
fontsize=16, arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=.2'))
"""
annotate()用于在图形中添加一个注释
%s 和 %y0是占位符,用于在文本中插入变量的值
xy=(x0, y0)指定了注释箭头所指向的位置的坐标
xycoords='data' 表示xy坐标是基于数据坐标系的。xycoords 参数可以接受不同的值来指定注释中 xy 坐标的参考坐标系
可以用data, offset points,figure fraction,offset points
xytext=(+30, -30)指定了注释文本的位置偏移,其中+30是在x正方向偏移30个点,-30表示在y负方向上偏移30个点
textcoords='offset points' 表示 xytext 坐标是基于偏移点的坐标系。
arrowprops=dict(arrowstyle='->', connectionstyle='arc3,rad=.2') 用于指定注释箭头的样式,
其中 arrowstyle='->' 表示箭头样式为一个朝向注释文本的箭头,
connectionstyle='arc3,rad=.2' 表示箭头与注释文本之间的连接线采用弧形。
"""
plt.text(-3.7, 3, r'$This\ is\ some\ text,\ \mu\ \sigma_i\ \alpha^t$',
fontdict={'size':16, "color":'g '})
plt.show()
x = np.linspace(-3, 3, 50)
y = 0.1*x
plt.figure()
plt.plot(x, y, linewidth=10, zorder=1)
plt.ylim(-2, 2)
ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))
for label in ax.get_xticklabels() + ax.get_yticklabels():
label.set_fontsize(12)
label.set_bbox(dict(facecolor='white', edgecolor='None', alpha=0.7))
plt.show()
n = 1024
X = np.random.normal(0, 1, n)
Y = np.random.normal(0, 1, n)
T = np.arctan2(Y, X)
plt.scatter(X, Y, c=T, alpha=0.5)
plt.xlim((-1.5, 1.5))
plt.ylim((-1.5, 1.5))
plt.xticks(())
plt.yticks(())
plt.show()
n = 12
X = np.arange(n)
Y1 = (1 - X/float(n)) * np.random.uniform(0.5, 1.0, n)
Y2 = (1 - X/float(n)) * np.random.uniform(0.5, 1.0, n)
plt.bar(X, Y1, facecolor='#9999ff', edgecolor='white')
plt.bar(X, -Y2, facecolor='#ff9999', edgecolor='white')
ax = plt.gca()
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
"""
zip是将多个元组打包,可以同时得到多个元组中的元素
"""
for x, y in zip(X, Y1):
plt.text(x, y+0.05, '%.2f' % y, ha='center', va='bottom')
for x, y in zip(X, Y2):
plt.text(x, -y-0.05, '%.2f' %y, ha='center', va='top')
plt.xlim(-.5, n)
plt.xticks(())
plt.ylim(-1.25, 1.25)
plt.yticks(())
plt.show()
def f(x, y):
return (1 - x / 2 + x**5 + y**3) * np.exp(-x**2 - y**2)
n = 256
x = np.linspace(-3, 3, n)
y = np.linspace(-3, 3, n)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)
plt.contourf(X, Y, Z, 8, alpha=0.8, cmap=plt.cm.rainbow)
C = plt.contour(X, Y, Z, 8, colors='black',)
plt.clabel(C, inline=True, fontsize=10)
plt.xticks(())
plt.yticks(())
plt.show()
a = np.array([0.313660827978, 0.365348418405, 0.423733120134,
0.365348418405, 0.439599930621, 0.525083754405,
0.423733120134, 0.525083754405, 0.651536351379]).reshape(3,3)
"""
for the value of "interpolation", check this:
http://matplotlib.org/examples/images_contours_and_fields/interpolation_methods.html
for the value of "origin"= ['upper', 'lower'], check this:
http://matplotlib.org/examples/pylab_examples/image_origin.html
"""
plt.imshow(a, interpolation='nearest', cmap='bone', origin='lower')
plt.colorbar(shrink=0.5)
plt.xticks(())
plt.yticks(())
plt.show()
fig = plt.figure()
ax = Axes3D(fig, auto_add_to_figure=False)
fig.add_axes(ax)
x = np.arange(-4, 4, 0.25)
y = np.arange(-4, 4, 0.25)
X, Y = np.meshgrid(x, y)
R = np.sqrt(X**2 + Y**2)
Z = np.sin(R)
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
ax.contourf(X, Y, Z, zdir='z', offset=-2, cmap='rainbow')
ax.set_zlim3d(-2, 2)
plt.show()
plt.figure()
plt.subplot(211)
plt.plot([0, 1], [0, 1])
plt.subplot(234)
plt.plot([0, 1], [1, 0])
plt.subplot(235)
plt.plot([-1, 0], [0, 1])
plt.subplot(2,3,6)
plt.plot([-1, 0], [0, -1])
plt.show()
plt.figure()
ax1 = plt.subplot2grid((3, 3), (0, 0), colspan=3, rowspan=1)
ax1.plot([1, 2], [1, 2])
ax1.set_title('ax1 title')
ax2 = plt.subplot2grid((3, 3), (1, 0), colspan=2)
ax3 = plt.subplot2grid((3, 3), (1, 2), rowspan=2)
ax4 = plt.subplot2grid((3, 3), (2, 0))
ax5 = plt.subplot2grid((3, 3), (2, 1))
plt.show()
plt.figure()
gs = gridspec.GridSpec(3, 3)
ax1 = plt.subplot(gs[0, :])
ax1.plot([0, 1], [0, 1])
ax1.set_title('ax1_title')
ax2 = plt.subplot(gs[1, :2])
ax3 = plt.subplot(gs[1:, 2])
ax4 = plt.subplot(gs[2, 0])
ax5 = plt.subplot(gs[2, 1])
plt.show()
f, ()plt.subplots(2, 2, sharex=True, sharey=True)
fig = plt.figure()
x = [1,2,3,4,5,6,7]
y = [1,3,4,2,5,8,6]
"""
left:子图左侧边界相对于整个图形的左侧的位置,以浮点数表示,取值范围为 [0, 1]。
bottom:子图底部边界相对于整个图形的底部的位置,以浮点数表示,取值范围为 [0, 1]。
width:子图的宽度相对于整个图形的宽度的比例,以浮点数表示,取值范围为 [0, 1]。
height:子图的高度相对于整个图形的高度的比例,以浮点数表示,取值范围为 [0, 1]。
"""
left, bottom, width, height = 0.1, 0.1, .8, .8
ax1 = fig.add_axes([left, bottom, width, height])
ax1.plot(x, y, 'r')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_title('title')
left, bottom, width, height = 0.2, 0.6, 0.25, 0.25
ax2 = fig.add_axes([left, bottom, width, height])
ax2.plot(x, y, 'b')
ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.set_title('title inside 1')
plt.axes([.6, .2, .25, .25])
plt.plot(y[::-1], x, 'g')
plt.xlabel('x')
plt.ylabel('y')
plt.title('title inside 2')
plt.show()
x = np.arange(0, 10, .1)
y1 = 0.05*x**2
y2 = -y1
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(x, y1, 'g-')
ax2.plot(x, y2, 'b--')
ax1.set_xlabel('X data')
ax1.set_ylabel('Y1', color='g')
ax2.set_ylabel('Y2', color='b')
plt.show()
"""
主次坐标轴案例
import matplotlib.pyplot as plt
# 创建主坐标轴
fig, ax1 = plt.subplots()
# 绘制主坐标轴的数据
x = [1, 2, 3, 4, 5]
y1 = [10, 20, 15, 25, 30]
ax1.plot(x, y1, 'b-', label='Primary Axis')
# 设置主坐标轴标签和标题
ax1.set_xlabel('X-axis')
ax1.set_ylabel('Primary Y-axis')
ax1.set_title('Primary and Secondary Axes')
# 创建次坐标轴
ax2 = ax1.twinx()
# 绘制次坐标轴的数据
y2 = [0.1, 0.2, 0.15, 0.25, 0.3]
ax2.plot(x, y2, 'r--', label='Secondary Axis')
# 设置次坐标轴标签
ax2.set_ylabel('Secondary Y-axis')
# 显示图例
ax1.legend(loc='upper left')
ax2.legend(loc='upper right')
# 显示图形
plt.show()
"""
"""
plt.subplots()返回一个包含图形对象和子图对象的元组 (fig, ax)。
fig 是图形对象,可以用于设置图形的整体属性,例如标题、尺寸等。
ax 是一个子图对象或子图对象数组,可以用于绘制和操作子图。
plt.subplots() 函数的常用参数包括:
nrows:子图的行数,默认为 1。
ncols:子图的列数,默认为 1。
sharex:如果为 True,则所有子图共享相同的 x 轴,默认为 False。
sharey:如果为 True,则所有子图共享相同的 y 轴,默认为 False。
squeeze:如果为 True,则当只有一个子图时,返回的子图对象不是数组,而是单个对象,默认为 True。
subplot_kw:用于传递给 add_subplot() 方法的关键字参数的字典。
gridspec_kw:用于传递给 GridSpec 构造函数的关键字参数的字典。
"""
fig, ax = plt.subplots()
x = np.arange(0, 2*np.pi, 0.01)
line, = ax.plot(x, np.sin(x))
def animate(i):
line.set_ydata(np.sin(x+i/10))
return line,
def init():
line.set_ydata(np.sin(x))
return line,
ani = animation.FuncAnimation(fig=fig, func=animate, frames=100, init_func=init, interval=20, blit=True)
plt.show()