import pandas as pd
pd.__version__
# 就是pandas模块,对一些汉字,处理不好,版本会继续升级的
输出
‘0.22.0’
import numpy as np
from pandas import Series, DataFrame
## 1、删除重复元素 ### 使用duplicated()函数检测重复的行,返回元素为布尔类型的Series对象,每个元素对应一行,如果该行不是第一次出现,则元素为True
# 在mysql中有属性表,城市,省份 市 市的名字只能是唯一,唯一键的效率低
# 如果有属性数据重复,那么pandas拿到的就是一个带有数据冗余的表
df = DataFrame({'color': ['white', 'red', 'white'], 'size': [10, 20, 10]})
df
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
2 | white | 10 |
#
df.duplicated()
输出
0 False
1 False
2 True
dtype: bool
df.drop_duplicates()
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
df2 = pd.concat([df, df], axis=1)
df2
color | size | color | size | |
---|---|---|---|---|
0 | white | 10 | white | 10 |
1 | red | 20 | red | 20 |
2 | white | 10 | white | 10 |
df2.duplicated()
输出
0 False
1 False
2 True
dtype: bool
df2.drop_duplicates()
color | size | color | size | |
---|---|---|---|---|
0 | white | 10 | white | 10 |
1 | red | 20 | red | 20 |
# 如果真的出现了两列完全相同,那么我们该怎么删除列
# drop 会删除所有的
df2.T.drop_duplicates().T
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
2 | white | 10 |
df
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
2 | white | 10 |
df.mean()
输出
size 13.333333
dtype: float64
d = {'white': 255, 'red': 128}
df.replace(d)
color | size | |
---|---|---|
0 | 255 | 10 |
1 | 128 | 20 |
2 | 255 | 10 |
d = {'white': 255, 'gray': 128}
df.replace(d)
# map也可以用来检索文章中敏感词
color | size | |
---|---|---|
0 | 255 | 10 |
1 | red | 20 |
2 | 255 | 10 |
df
color | size | |
---|---|---|
0 | white | 10 |
1 | red | 20 |
2 | white | 10 |
df['size'][0] = np.nan
C:\Users\Administrator\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy “”“Entry point for launching an IPython kernel. C:\Users\Administrator\Anaconda3\lib\site-packages\pandas\core\indexing.py:194: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy self._setitem_with_indexer(indexer, value)
# 我们的表中有中如果nan,我们想填充0, fillna()
d = {np.nan: 0}
df.replace(d)
color | size | |
---|---|---|
0 | white | 0.0 |
1 | red | 20.0 |
2 | white | 10.0 |
df2 = DataFrame(np.random.randint(0, 150, size=(4, 2)),
columns=['Python', 'Java'],
index=list('abcd'))
df2
Python | Java | |
---|---|---|
a | 58 | 1 |
b | 99 | 71 |
c | 66 | 7 |
d | 104 | 143 |
#使用map函数新建一列
# 生成一个Math的列
df2['Math'] = df2['Python'].map(lambda x: x + 20)
df2
Python | Java | Math | |
---|---|---|---|
a | 58 | 1 | 78 |
b | 99 | 71 | 119 |
c | 66 | 7 | 86 |
d | 104 | 143 | 124 |
def level(x):
if x > 100:
return '完美'
elif x > 100 and x >= 80:
return '优秀'
elif x < 80 and x >= 60:
return '及格'
else:
return '不及格'
#用数学成绩判断这个人的等级,然后将判断得到的等级添加为一列
df2['level'] = df2['Math'].map(level)
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 78 | 不及格 |
b | 99 | 71 | 119 | 完美 |
c | 66 | 7 | 86 | 不及格 |
d | 104 | 143 | 124 | 完美 |
df2['Math'] = df2['Math'].transform(lambda x: x + np.random.randint(0, 50, size=1)[0])
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 完美 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
使用map()函数新建一个新列
df2.drop('level', axis=1, inplace=True)
df2
# 我们在pandas中map中最大区别一个使用dict一个不可以使用
df2['level'] = df2['Java'].transform(level)
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 及格 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 及格 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
#这个是不行的
df2.index[0] = '张三'
# rename 要替换的索引存在则替换,不存在就不管
df2.rename({'a':'张三', 'Python': '大蟒蛇'},axis=1)
大蟒蛇 | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 及格 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
# rename中的参数值可以是一个func
def index_rename(item):
if item == 'a':
return '张三'
else:
return '李四'
df2.rename(index_rename)
Python | Java | Math | level | |
---|---|---|---|---|
张三 | 58 | 1 | 121 | 不及格 |
李四 | 99 | 71 | 161 | 及格 |
李四 | 66 | 7 | 134 | 不及格 |
李四 | 104 | 143 | 158 | 完美 |
# 数据中比较大或特别小的都可以认为是异常值
# 大头子和小头爸爸都算是人类中的异常值
# NaN也是异常值
df3 = DataFrame(np.random.randint(0, 150, size=(10, 4)),
columns=['Python', 'Java', 'PHP', 'VR'])
df3
Python | Java | PHP | VR | |
---|---|---|---|---|
0 | 122 | 19 | 146 | 88 |
1 | 82 | 105 | 120 | 146 |
2 | 134 | 19 | 81 | 127 |
3 | 0 | 78 | 75 | 39 |
4 | 3 | 115 | 19 | 8 |
5 | 16 | 147 | 71 | 44 |
6 | 16 | 92 | 124 | 131 |
7 | 42 | 141 | 64 | 114 |
8 | 31 | 103 | 64 | 13 |
9 | 103 | 83 | 128 | 108 |
df3.describe()
Python | Java | PHP | VR | |
---|---|---|---|---|
count | 10.000000 | 10.000000 | 10.000000 | 10.000000 |
mean | 54.900000 | 90.200000 | 89.200000 | 81.800000 |
std | 50.858081 | 43.649615 | 39.000855 | 51.410764 |
min | 0.000000 | 19.000000 | 19.000000 | 8.000000 |
25% | 16.000000 | 79.250000 | 65.750000 | 40.250000 |
50% | 36.500000 | 97.500000 | 78.000000 | 98.000000 |
75% | 97.750000 | 112.500000 | 123.000000 | 123.750000 |
max | 134.000000 | 147.000000 | 146.000000 | 146.000000 |
使用std()函数可以求得DataFrame对象每一列的标准差
df3.std()
输出
Python 50.858081
Java 43.649615
PHP 39.000855
VR 51.410764
dtype: float64
根据每一列的标准差,对DataFrame元素进行过滤。
借助any()函数, 测试是否有True,有一个或以上返回True,反之返回False
对每一列应用筛选条件,去除标准差太大的数据
df3
Python | Java | PHP | VR | |
---|---|---|---|---|
0 | 122 | 19 | 146 | 88 |
1 | 82 | 105 | 120 | 146 |
2 | 134 | 19 | 81 | 127 |
3 | 0 | 78 | 75 | 39 |
4 | 3 | 115 | 19 | 8 |
5 | 16 | 147 | 71 | 44 |
6 | 16 | 92 | 124 | 131 |
7 | 42 | 141 | 64 | 114 |
8 | 31 | 103 | 64 | 13 |
9 | 103 | 83 | 128 | 108 |
# 大于70的数据我们认定为异常值
cond1 = df3 >= 70
cond2 = cond1.all(axis=1)
df3[cond2]
Python | Java | PHP | VR | |
---|---|---|---|---|
1 | 82 | 105 | 120 | 146 |
9 | 103 | 83 | 128 | 108 |
删除特定索引df.drop(labels,inplace = True)
## 4. 排序 使用.take()函数排序 可以借助np.random.permutation()函数随机排序# 生成指定大小的矩阵
np.random.permutation([4, 2])
输出
array([4, 2])
df2
Python | Java | Math | level | |
---|---|---|---|---|
a | 58 | 1 | 121 | 不及格 |
b | 99 | 71 | 161 | 及格 |
c | 66 | 7 | 134 | 不及格 |
d | 104 | 143 | 158 | 完美 |
df2.iloc[::-1]
Python | Java | Math | level | |
---|---|---|---|---|
d | 104 | 143 | 158 | 完美 |
c | 66 | 7 | 134 | 不及格 |
b | 99 | 71 | 161 | 及格 |
a | 58 | 1 | 121 | 不及格 |
# 通过手写的方式将矩阵进行颠倒
df2.take([3, 2, 1, 0])
Python | Java | Math | level | |
---|---|---|---|---|
d | 104 | 143 | 158 | 完美 |
c | 66 | 7 | 134 | 不及格 |
b | 99 | 71 | 161 | 及格 |
a | 58 | 1 | 121 | 不及格 |
# 怎么进行一个随机的排序
df2.take(np.random.permutation(4), axis=1)
Python | level | Java | Math | |
---|---|---|---|---|
a | 58 | 不及格 | 1 | 121 |
b | 99 | 及格 | 71 | 161 |
c | 66 | 不及格 | 7 | 134 |
d | 104 | 完美 | 143 | 158 |
df4 = DataFrame(np.random.randint(0, 150, size=(1000, 4)),
columns=['Python', 'Java', 'Math', 'China'])
df4
Python | Java | Math | China | |
---|---|---|---|---|
0 | 0 | 94 | 45 | 52 |
1 | 130 | 44 | 120 | 60 |
2 | 45 | 134 | 149 | 14 |
3 | 18 | 62 | 4 | 55 |
4 | 73 | 66 | 112 | 27 |
5 | 17 | 90 | 61 | 136 |
6 | 106 | 101 | 29 | 100 |
7 | 60 | 58 | 142 | 60 |
8 | 47 | 94 | 70 | 24 |
9 | 9 | 115 | 101 | 80 |
10 | 92 | 2 | 135 | 35 |
11 | 149 | 57 | 39 | 92 |
12 | 131 | 143 | 149 | 142 |
13 | 35 | 68 | 12 | 119 |
14 | 137 | 62 | 47 | 68 |
15 | 67 | 74 | 140 | 34 |
16 | 85 | 139 | 88 | 104 |
17 | 32 | 122 | 31 | 145 |
18 | 65 | 24 | 33 | 17 |
19 | 68 | 41 | 60 | 99 |
20 | 76 | 104 | 29 | 78 |
21 | 61 | 9 | 26 | 58 |
22 | 65 | 23 | 12 | 22 |
23 | 27 | 90 | 122 | 93 |
24 | 55 | 119 | 49 | 27 |
25 | 92 | 95 | 116 | 127 |
26 | 127 | 96 | 39 | 60 |
27 | 116 | 15 | 103 | 130 |
28 | 86 | 53 | 11 | 128 |
29 | 120 | 91 | 138 | 124 |
… | … | … | … | … |
970 | 100 | 60 | 17 | 66 |
971 | 122 | 148 | 30 | 22 |
972 | 33 | 133 | 121 | 110 |
973 | 34 | 66 | 62 | 133 |
974 | 88 | 14 | 93 | 131 |
975 | 109 | 86 | 143 | 28 |
976 | 69 | 45 | 86 | 31 |
977 | 55 | 92 | 57 | 138 |
978 | 12 | 63 | 72 | 23 |
979 | 100 | 40 | 116 | 142 |
980 | 113 | 87 | 74 | 80 |
981 | 44 | 62 | 93 | 39 |
982 | 144 | 63 | 130 | 111 |
983 | 38 | 57 | 105 | 55 |
984 | 10 | 18 | 94 | 20 |
985 | 86 | 144 | 12 | 35 |
986 | 96 | 6 | 8 | 54 |
987 | 10 | 93 | 61 | 127 |
988 | 7 | 61 | 79 | 110 |
989 | 28 | 141 | 128 | 76 |
990 | 136 | 136 | 13 | 66 |
991 | 80 | 129 | 61 | 101 |
992 | 141 | 143 | 51 | 38 |
993 | 29 | 46 | 118 | 129 |
994 | 40 | 64 | 69 | 100 |
995 | 92 | 70 | 142 | 54 |
996 | 115 | 71 | 62 | 55 |
997 | 28 | 10 | 93 | 107 |
998 | 31 | 137 | 88 | 75 |
999 | 76 | 36 | 44 | 119 |
1000 rows × 4 columns
# 抽取1000行中50个
#公司年会可以做一个抽奖系统
df4.take(np.random.randint(0, 1000, size=50))
Python | Java | Math | China | |
---|---|---|---|---|
110 | 11 | 145 | 121 | 64 |
799 | 9 | 4 | 42 | 21 |
333 | 36 | 3 | 134 | 145 |
998 | 31 | 137 | 88 | 75 |
143 | 6 | 146 | 87 | 23 |
602 | 115 | 109 | 59 | 53 |
903 | 132 | 32 | 147 | 81 |
74 | 102 | 57 | 146 | 84 |
500 | 124 | 116 | 84 | 17 |
218 | 4 | 99 | 91 | 126 |
490 | 74 | 56 | 39 | 134 |
771 | 1 | 75 | 57 | 6 |
632 | 39 | 10 | 145 | 106 |
615 | 75 | 113 | 82 | 37 |
369 | 46 | 17 | 139 | 106 |
227 | 50 | 144 | 134 | 67 |
518 | 27 | 99 | 42 | 15 |
866 | 68 | 30 | 79 | 43 |
626 | 47 | 15 | 27 | 111 |
748 | 91 | 111 | 70 | 89 |
135 | 84 | 46 | 131 | 110 |
926 | 1 | 56 | 129 | 148 |
905 | 31 | 136 | 25 | 32 |
387 | 136 | 24 | 103 | 131 |
837 | 145 | 50 | 10 | 18 |
804 | 60 | 75 | 70 | 139 |
727 | 6 | 57 | 98 | 14 |
907 | 142 | 63 | 88 | 25 |
538 | 145 | 41 | 89 | 18 |
81 | 51 | 11 | 33 | 69 |
646 | 130 | 7 | 43 | 15 |
131 | 148 | 117 | 103 | 17 |
846 | 141 | 84 | 99 | 48 |
923 | 111 | 148 | 40 | 78 |
96 | 108 | 128 | 41 | 108 |
275 | 4 | 41 | 93 | 41 |
612 | 15 | 7 | 16 | 81 |
757 | 103 | 79 | 48 | 70 |
581 | 13 | 10 | 73 | 37 |
709 | 53 | 40 | 117 | 121 |
641 | 146 | 17 | 127 | 23 |
724 | 22 | 68 | 16 | 32 |
671 | 134 | 34 | 140 | 80 |
445 | 108 | 45 | 134 | 55 |
883 | 137 | 114 | 112 | 90 |
300 | 139 | 104 | 121 | 9 |
451 | 66 | 134 | 138 | 127 |
711 | 39 | 92 | 147 | 74 |
466 | 14 | 99 | 34 | 70 |
322 | 108 | 86 | 122 | 62 |
df5 = DataFrame({'item': ['萝卜', '白菜', '西红柿', '辣椒', '冬瓜','萝卜', '西红柿', '白菜', '西红柿', '辣椒', '冬瓜'],
'seller': ['李大妈', '李大妈', '李大妈', '王大妈', '王大妈', '王大妈', '王大妈', '赵大妈', '赵大妈', '赵大妈', '赵大妈'],
'price': np.random.randint(3, 10, size=11)},
columns = ['item', 'seller', 'price'])
df5
item | seller | price | |
---|---|---|---|
0 | 萝卜 | 李大妈 | 9 |
1 | 白菜 | 李大妈 | 7 |
2 | 西红柿 | 李大妈 | 8 |
3 | 辣椒 | 王大妈 | 6 |
4 | 冬瓜 | 王大妈 | 8 |
5 | 萝卜 | 王大妈 | 5 |
6 | 西红柿 | 王大妈 | 4 |
7 | 白菜 | 赵大妈 | 8 |
8 | 西红柿 | 赵大妈 | 4 |
9 | 辣椒 | 赵大妈 | 6 |
10 | 冬瓜 | 赵大妈 | 6 |
# 首先是分组,然后是找出一个最小值
# 找出各蔬菜中最便宜的
df5.groupby(['item']).min()
seller | price | |
---|---|---|
item | ||
冬瓜 | 王大妈 | 6 |
白菜 | 李大妈 | 7 |
萝卜 | 李大妈 | 5 |
西红柿 | 李大妈 | 4 |
辣椒 | 王大妈 | 6 |
df5.groupby(['item']).max()
seller | price | |
---|---|---|
item | ||
冬瓜 | 赵大妈 | 8 |
白菜 | 赵大妈 | 8 |
萝卜 | 王大妈 | 9 |
西红柿 | 赵大妈 | 8 |
辣椒 | 赵大妈 | 6 |
# 求平均值,并且添加前缀
mean_price = df5.groupby(['item']).mean().add_prefix('mean_')
mean_price
# select avg('item) as asd
mean_price | |
---|---|
item | |
冬瓜 | 7.000000 |
白菜 | 7.500000 |
萝卜 | 7.000000 |
西红柿 | 5.333333 |
辣椒 | 6.000000 |
# 把得到的平均值融合到原表中
# left_on 设定左边表的关联列,右表对齐,多对多
df6 = pd.merge(df5, mean_price, left_on='item', right_index=True)
df6
item | seller | price | mean_price | |
---|---|---|---|---|
0 | 萝卜 | 李大妈 | 9 | 7.000000 |
5 | 萝卜 | 王大妈 | 5 | 7.000000 |
1 | 白菜 | 李大妈 | 7 | 7.500000 |
7 | 白菜 | 赵大妈 | 8 | 7.500000 |
2 | 西红柿 | 李大妈 | 8 | 5.333333 |
6 | 西红柿 | 王大妈 | 4 | 5.333333 |
8 | 西红柿 | 赵大妈 | 4 | 5.333333 |
3 | 辣椒 | 王大妈 | 6 | 6.000000 |
9 | 辣椒 | 赵大妈 | 6 | 6.000000 |
4 | 冬瓜 | 王大妈 | 8 | 7.000000 |
10 | 冬瓜 | 赵大妈 | 6 | 7.000000 |
# 先求一个平方差,求各个的蔬菜价格波动
price_std = df5.groupby(['item']).std().add_prefix('std_')
price_std
std_price | |
---|---|
item | |
冬瓜 | 1.414214 |
白菜 | 0.707107 |
萝卜 | 2.828427 |
西红柿 | 2.309401 |
辣椒 | 0.000000 |
# 将蔬菜的波动值融合到原数据中
df7 = pd.merge(df6, price_std, left_on='item', right_index=True)
df7
item | seller | price | mean_price | std_price | |
---|---|---|---|---|---|
0 | 萝卜 | 李大妈 | 9 | 7.000000 | 2.828427 |
5 | 萝卜 | 王大妈 | 5 | 7.000000 | 2.828427 |
1 | 白菜 | 李大妈 | 7 | 7.500000 | 0.707107 |
7 | 白菜 | 赵大妈 | 8 | 7.500000 | 0.707107 |
2 | 西红柿 | 李大妈 | 8 | 5.333333 | 2.309401 |
6 | 西红柿 | 王大妈 | 4 | 5.333333 | 2.309401 |
8 | 西红柿 | 赵大妈 | 4 | 5.333333 | 2.309401 |
3 | 辣椒 | 王大妈 | 6 | 6.000000 | 0.000000 |
9 | 辣椒 | 赵大妈 | 6 | 6.000000 | 0.000000 |
4 | 冬瓜 | 王大妈 | 8 | 7.000000 | 1.414214 |
10 | 冬瓜 | 赵大妈 | 6 | 7.000000 | 1.414214 |
# std()标准平方差,看数据的波动,描述情况
def std_price(std_p):
if std_p > 2.5:
return '价格很坑'
elif std_p > 1:
return '价格稳定'
else:
return '良心菜价'
# map
# 将各个蔬菜的价格情况添加为一列
df7['std_p'] = df7['std_price'].map(std_price)
df7
item | seller | price | mean_price | std_price | std_p | |
---|---|---|---|---|---|---|
0 | 萝卜 | 李大妈 | 9 | 7.000000 | 2.828427 | 价格很坑 |
5 | 萝卜 | 王大妈 | 5 | 7.000000 | 2.828427 | 价格很坑 |
1 | 白菜 | 李大妈 | 7 | 7.500000 | 0.707107 | 良心菜价 |
7 | 白菜 | 赵大妈 | 8 | 7.500000 | 0.707107 | 良心菜价 |
2 | 西红柿 | 李大妈 | 8 | 5.333333 | 2.309401 | 价格稳定 |
6 | 西红柿 | 王大妈 | 4 | 5.333333 | 2.309401 | 价格稳定 |
8 | 西红柿 | 赵大妈 | 4 | 5.333333 | 2.309401 | 价格稳定 |
3 | 辣椒 | 王大妈 | 6 | 6.000000 | 0.000000 | 良心菜价 |
9 | 辣椒 | 赵大妈 | 6 | 6.000000 | 0.000000 | 良心菜价 |
4 | 冬瓜 | 王大妈 | 8 | 7.000000 | 1.414214 | 价格稳定 |
10 | 冬瓜 | 赵大妈 | 6 | 7.000000 | 1.414214 | 价格稳定 |
df7
item | seller | price | mean_price | std_price | std_p | |
---|---|---|---|---|---|---|
0 | 萝卜 | 李大妈 | 9 | 7.000000 | 2.828427 | 价格很坑 |
5 | 萝卜 | 王大妈 | 5 | 7.000000 | 2.828427 | 价格很坑 |
1 | 白菜 | 李大妈 | 7 | 7.500000 | 0.707107 | 良心菜价 |
7 | 白菜 | 赵大妈 | 8 | 7.500000 | 0.707107 | 良心菜价 |
2 | 西红柿 | 李大妈 | 8 | 5.333333 | 2.309401 | 价格稳定 |
6 | 西红柿 | 王大妈 | 4 | 5.333333 | 2.309401 | 价格稳定 |
8 | 西红柿 | 赵大妈 | 4 | 5.333333 | 2.309401 | 价格稳定 |
3 | 辣椒 | 王大妈 | 6 | 6.000000 | 0.000000 | 良心菜价 |
9 | 辣椒 | 赵大妈 | 6 | 6.000000 | 0.000000 | 良心菜价 |
4 | 冬瓜 | 王大妈 | 8 | 7.000000 | 1.414214 | 价格稳定 |
10 | 冬瓜 | 赵大妈 | 6 | 7.000000 | 1.414214 | 价格稳定 |
# 将各个蔬菜的价格求和
df7.groupby(['item'])['price'].sum()
输出
item
冬瓜 14
白菜 15
萝卜 14
西红柿 16
辣椒 12
Name: price, dtype: int32
df7.groupby(['item'])['price'].transform(sum)
输出
0 14
5 14
1 15
7 15
2 16
6 16
8 16
3 12
9 12
4 14
10 14
Name: price, dtype: int32
df = DataFrame({'color':['white','black','white','white','black','black'],
'status':['up','up','down','down','down','up'],
'value1':[12.33,14.55,22.34,27.84,23.40,18.33],
'value2':[11.23,31.80,29.99,31.18,18.25,22.44]})
apply的操作对象,也就是传给lambda的参数是整列的数组
# np.mean()
# apply 与 transform的最大区别,在于transform做了循环(交叉表)CROSS JOIN , apply比较简洁,调用原本的功能
df7.groupby(['item'])['price'].apply(sum)
输出
item
冬瓜 14
白菜 15
萝卜 14
西红柿 16
辣椒 12
Name: price, dtype: int64
df7.groupby(['item'])['price'].apply(np.mean)
- 输出 item 冬瓜 7.000000 白菜 7.500000 萝卜 7.000000 西红柿 5.333333 辣椒 6.000000 Name: price, dtype: float64 Series和DataFrame都有一个都有一个生成各类图标的plot方法,默认情况下锁生成的都是线形图
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# matplotlib不引入,在调用plot()的时候不会报错,但是图片也显示不出
# pandas和matplotlib进行了深度合作
import matplotlib.pyplot as plt
# 线形图 简单示例Series图例表示.plot()
s = Series(np.random.randint(0 ,20, size=10))
s
输出
0 14
1 6
2 19
3 5
4 7
5 16
6 1
7 10
8 11
9 17
dtype: int32
s.plot()
简单的DataFrame图标实例.plot()
df = DataFrame(np.random.randint(0, 150, size=(5, 4)),columns = ['Python', 'Java', 'PHP', 'Ruby'])
df.plot()
### 柱状图 DataFrame柱状图例
df.plot(kind='bar')
df.plot(kind='barh')
读取tips.csv,查看每天聚会人数,每天各种聚会规模的比例饿
求和并df.sum(),注意灵活使用axis()
tip = pd.read_csv('./tips.csv')
tip
day | 1 | 2 | 3 | 4 | 5 | 6 | |
---|---|---|---|---|---|---|---|
0 | Fri | 1 | 16 | 1 | 1 | 0 | 0 |
1 | Stat | 2 | 53 | 18 | 13 | 1 | 0 |
2 | Sun | 0 | 39 | 15 | 18 | 3 | 1 |
3 | Thur | 1 | 48 | 4 | 5 | 1 | 3 |
将第一列day数据编程行索引set_index
tip.set_index('day',inplace=True)
tip
# 行 代表是星期几
# 列 代表的事几个人一同去吃饭
1 | 2 | 3 | 4 | 5 | 6 | |
---|---|---|---|---|---|---|
day | ||||||
Fri | 1 | 16 | 1 | 1 | 0 | 0 |
Stat | 2 | 53 | 18 | 13 | 1 | 0 |
Sun | 0 | 39 | 15 | 18 | 3 | 1 |
Thur | 1 | 48 | 4 | 5 | 1 | 3 |
tip.plot(kind='bar')
#将不重要的数据给去除掉
tip.drop(axis=1,labels=['1', '6'], inplace=True)
# 还能用什么方法保留2-5的数据
tip.iloc[:,1:-1].plot(kind='bar')
# 下课自己去尝试一下
tip.plot(kind='bar')
## 直方图 random生成随机直方图,调用hist()方法
nd = np.random.randint(0, 10, size=10)
nd
输出
array([2, 3, 5, 4, 7, 0, 4, 4, 0, 5])
s = Series(nd)
# bins 默认值是10 值越大条越细
s.hist(bins=50)
随机百分比密度图
# density 密度
s.plot(kind='kde')
这两张表经常会被画在一起,直方图以规格形式给出(以便画出密度图),然后在再其上绘制核密度估计。
接下来看看一个由两个不同de 标准正太正太分布组成的双峰分布。
np.random.normal()正太分布函数
直方图hist,函数中心必须添加属性normed = True
nd1 = np.random.normal(loc=15, scale=5, size=1000)
nd1
输出
array([16.50002167, 21.95871306, 22.75958416, 17.31162324, 21.11362045,
18.26581647, 14.3677174 , 14.04193695, 24.47250718, 12.15495935,
9.27291491, 12.31342298, 10.2092411 , 26.50106733, 15.73813638,
15.96716855, 16.69108033, 17.47009465, 23.25585844, 3.75013434,
25.12165759, 9.2131128 , 20.80536815, 15.5412481 , 28.90305594,
20.53246725, 11.80165575, 21.60802209, 11.17585822, 16.32740781,
15.18755747, 18.45527818, 20.69591603, 21.49051133, 17.71510416,
18.80558295, 24.17867724, 20.85117481, 20.24591299, 10.10623473,
14.24666052, 17.52448709, 19.22814077, 15.72357144, 20.43921318,
8.0917059 , 22.00146547, 23.50631652, 9.19318319, 9.66151146,
15.75132368, 15.32207848, 15.48579172, 25.26377042, 9.0799947 ,
7.36254758, 2.42731825, 18.10300724, 10.88015804, 13.39732585,
8.91052017, 24.09794363, 18.02868005, 9.901521 , 13.21712227,
19.5068725 , 10.50911924, 14.89803695, 15.50261478, 17.15580065,
14.98757024, 26.86473339, 12.68019989, 15.02038962, 17.42206335,
11.8994741 , 14.31309029, 13.75152275, 6.47324371, 14.73115004,
21.72693826, 20.54387112, 21.36295281, 25.51458225, 7.25840778,
13.76897525, 19.77348404, 15.09407833, 18.76318749, 5.43239314,
15.45835862, 15.40969353, 16.28667652, 13.07865591, 8.28808502,
13.42694347, 18.38122404, 17.6814759 , 19.28344426, 9.53901169,
14.46634917, 11.84352671, 11.09474199, 4.75771378, 15.53622797,
13.13240292, 11.15249509, 8.30862199, 17.21214064, 17.02251594,
16.28650651, 8.21461943, 14.76498248, 15.98369367, 11.98718219,
17.02526983, 19.97020523, 16.44969423, 13.71055375, 22.77569864,
8.51357972, 6.61518919, 14.84898364, 17.58370265, 17.49575552,
12.93721125, 14.53553582, 18.22937262, 15.59990324, 10.94411136,
15.15079863, 7.75436718, 19.64155892, 8.05933151, 14.14798446,
25.93909358, 9.48265882, 16.06401139, 17.17927089, 16.88383318,
19.95283715, 12.02420859, 18.16494132, 14.31856533, 17.48454134,
19.70450889, 16.15431485, 14.97920007, 12.75705767, 21.13982655,
23.76846867, 20.43796706, 17.5593216 , 13.81091458, 19.4130765 ,
22.30718067, 18.30679605, 18.73435888, 21.1085248 , 16.17991617,
17.66907561, 9.3648496 , 12.53151174, 18.74196939, 14.72622698,
21.91666941, 10.68175022, 28.06536912, 16.92750333, 10.48199841,
16.95819675, 11.6035696 , 17.37937365, 12.65895274, 11.54480806,
18.3616206 , 18.02565635, 15.90571496, 6.96905143, 23.91371845,
18.72841836, 2.36636948, 15.66448126, 19.13919071, 14.37065233,
14.82159192, 13.72377931, 16.78926541, 20.13959017, 16.77379021,
14.84332182, 17.34148733, 18.78528991, 7.46478532, 21.50181241,
7.9833104 , 17.80479141, 16.70124722, 13.64582571, 13.577898 ,
16.37998434, 19.03837668, 16.89253034, 20.42677538, 11.88847021,
11.38631853, 15.50082332, 16.7030249 , 16.088621 , 12.51561343,
15.73055771, 19.48992889, 13.06485701, 8.0678333 , 18.05531125,
5.43515846, 19.04760736, 7.879961 , 20.12691794, 8.43035117,
17.22251733, 8.03678616, 16.0078554 , 16.15014473, 8.21224197,
19.38089222, 9.01029769, 11.26062042, 14.76521656, 17.35078069,
22.80520652, 14.46913352, 17.42879149, 13.37647894, 17.25338735,
18.73637238, 2.88421749, 20.40011706, 10.39826822, 15.05082296,
19.06963941, 16.91706731, 13.82272106, 7.27754931, 16.11283426,
15.5246287 , 13.81157399, 6.09658837, 15.6001777 , 18.92162163,
13.38838482, 12.3894269 , 25.61593369, 14.51006631, 24.07157909,
16.8478554 , 19.06337918, 6.89606982, 15.89648191, 9.94890574,
4.1741199 , 19.96662183, 22.11552452, 20.07915682, 11.81127282,
10.01155602, 16.06106413, 13.72389102, 13.58895892, 6.38356799,
15.51871295, 18.04894363, 6.91148171, 15.76145194, 11.06726095,
15.0972117 , 13.89722476, 17.2514969 , 13.71947839, 15.46657072,
9.57084451, 8.69629316, 18.11964659, 13.78259735, 25.01368229,
9.83800798, 9.04644566, 9.01788547, 13.94342442, 19.78553559,
14.88336783, 7.98179993, 12.39448036, 17.1408828 , 19.33147132,
14.05125452, 17.23008172, 13.65371068, 16.17567752, 12.23122378,
12.10680811, 2.95619838, 6.89529116, 15.51517282, 15.19482832,
12.01639161, 18.7831425 , 17.0239132 , 1.38190082, 13.69724912,
13.06217886, 26.94699552, 22.59459457, 11.97867369, 13.93633583,
17.10552488, 22.58992482, 19.40934193, 19.53641763, 15.88442231,
7.9295681 , 21.77780904, 12.47072318, 13.81039032, 17.39736673,
13.15453916, 21.36911849, 10.70201259, 10.59387373, 12.65426619,
11.54376933, 13.14155316, 16.87773098, 11.76657509, 14.89503279,
11.12283823, 21.28503492, 19.51000396, 16.66250962, 12.52950223,
20.49618371, 16.8304768 , 13.58145978, 5.67632775, 12.51501766,
16.40642439, 17.349106 , 7.89876048, 23.19479679, 10.2073971 ,
14.0566871 , 14.78658926, 20.44095559, 15.54838402, 14.89340323,
12.12392313, 8.92467267, 12.74334742, 17.47857181, 19.19866611,
2.45721744, 8.89357759, 8.14078952, 20.12140264, 13.08791183,
15.15808184, 11.52595765, 9.67495873, 8.53996197, 14.04442005,
22.79638213, 13.1821865 , 12.72650507, 9.90846988, 11.41972578,
17.93019223, 17.29881884, 12.55815741, 14.25266947, 10.33093622,
7.142058 , 21.69019735, 17.25592536, 13.08063266, 8.6764044 ,
10.24371983, 15.41889379, 4.28811294, 11.39411962, 26.56646754,
16.96888119, 18.83606664, 15.01419964, 15.34971026, 22.02537937,
4.36119692, 25.28447139, 10.48726479, 13.59826026, 14.06848502,
12.99063847, 16.78391206, 9.74175223, 19.30650068, 22.54410113,
24.98914811, 9.94762802, 21.40109447, 13.6695364 , 7.72764777,
9.64404477, 11.17475083, 12.60055258, 20.97721358, 23.34476622,
18.61854549, 15.52419453, 18.91436655, 9.62666948, 10.61228808,
18.06891452, 19.51675254, 13.23077434, 19.00453996, 13.32899195,
12.26702829, 15.6820842 , 8.11843937, 21.44353167, 15.52251699,
11.4997531 , 24.72088535, 16.28930122, 8.0863221 , 14.2955109 ,
7.17201489, 13.87547583, 17.67027528, 19.13974024, 22.68693614,
24.18678422, 6.06103468, 24.76111537, 10.97781017, 15.81552381,
10.84972314, 21.14140714, 13.76751962, 6.04277574, 24.58010336,
8.68660216, 16.87910212, 26.79462555, 16.53919855, 12.56473841,
12.48541333, 14.29182684, 10.48183018, 17.41734179, 10.97468455,
11.04737238, 11.98624708, 6.25940958, 14.55275986, 10.96969175,
14.44198921, 12.22024554, 21.06729722, 18.63578895, 15.01203526,
9.78234677, 12.69823694, 18.53826221, 18.33718908, 8.67841207,
12.40316973, 6.25257103, 13.12125998, 23.65467315, 8.48692567,
16.48741407, 22.88008283, 9.74399847, 24.23586658, 20.85856245,
6.93278193, 9.39064606, 11.74417199, 17.26023393, 17.4899952 ,
15.60605781, 17.23917663, 16.29546289, 10.05711149, 7.27520158,
13.98169832, 14.95431197, 15.24916955, 14.78272085, 21.01423717,
11.47976526, 16.05894889, 16.30783588, 11.30069306, 21.44509951,
21.12234178, 13.63871927, 8.67404016, 19.55244501, 10.47571836,
21.03142778, 19.22024405, 17.56288251, 11.91033324, 11.17856874,
11.01553264, 14.59588468, 8.70018864, 12.13660946, 24.77501267,
10.5752547 , 8.6157514 , 6.26227458, 14.93877643, 21.75858592,
17.27632436, 13.10800654, 21.44729017, 11.76678708, 21.22406953,
16.94976341, 5.29558133, 21.19612368, 9.19780541, 18.17468954,
16.2252154 , 19.59557946, 11.66578002, 19.06531132, 21.36638746,
20.6541794 , 15.53407109, 14.31871485, 8.88234657, 19.95083641,
15.33807678, 23.35312598, -0.41594355, 12.99684309, 17.81847784,
8.65620742, 11.11906853, 14.72213943, 10.96787938, 19.0607508 ,
15.06548159, 10.73352375, 7.80835499, 18.45434514, 10.94886255,
17.02581022, 15.74533205, 16.83597387, 15.6272432 , 15.07400084,
21.43340505, 19.8596281 , 15.10743665, 9.8524759 , 18.4157017 ,
8.71576747, 8.81978059, 18.74270385, 20.19611499, 7.78305738,
11.12512739, 11.0320895 , 4.08279734, 19.50883865, 18.3420021 ,
16.38436628, 15.41935401, 14.88358696, 21.28094706, 8.07498229,
5.44891281, 22.96052191, 4.82706329, 18.64935207, 6.81372616,
21.48792701, 10.81966699, 16.51777675, 13.66587892, 5.15167454,
19.6746364 , 18.92921522, 11.80793426, 15.99329862, 13.5428769 ,
17.09127653, 18.0650194 , 16.9966228 , 15.88075782, 10.01499427,
15.53643766, 14.82116696, 19.64805793, 13.38260529, 17.55275748,
12.79123029, 1.60388552, 9.64273317, 14.10328556, 12.80516354,
16.13765219, 24.82846188, 12.40167473, 14.37984687, 7.17152562,
3.57487493, 18.68028442, 22.02231231, 13.08430578, 15.36424363,
12.58209423, 19.16279834, 15.5310864 , 6.74056615, 18.95227059,
9.62725175, 24.77371694, 18.72404572, 22.33711919, 13.33348445,
10.82508722, 9.09203216, 2.25354049, 11.87985931, 9.93989723,
11.64604036, 13.04827512, 20.30953976, 12.18749003, 13.07841278,
16.33515174, 18.03769259, 16.79215097, 25.2966099 , 25.94685352,
5.69546044, 11.92172349, 19.8342691 , 12.6440724 , 20.28437939,
16.35062192, 14.67498508, 18.03873384, 11.98190638, 12.48304321,
17.18353178, 11.6280484 , 14.99820737, 18.62490662, 21.36806158,
10.33361186, 20.65887684, 18.14897646, 13.90046949, 11.97019342,
12.86450733, 14.79502629, 7.12492396, 14.02432214, 7.74633751,
20.42791153, 4.51691996, 12.45499258, 26.46562207, 25.43928357,
16.02156583, 15.77301938, 11.19124392, 20.2017068 , 11.38777201,
15.18199219, 13.06922751, 10.84513727, 16.72115341, 18.10468922,
3.27337482, 15.77864477, 11.69318446, 16.11939922, 5.75250131,
11.70259621, 9.96164663, 14.04692601, 16.66432923, 13.02730275,
22.14321073, 9.97324488, 20.89977249, 16.61452685, 14.98716636,
24.34831533, 12.64388541, 24.29123835, 18.62064405, 14.92635478,
20.6628145 , 12.35438856, 16.62651241, 15.23094509, 15.17337696,
6.55103708, 19.01998175, 23.25536204, 17.41528444, 21.16583531,
18.9232849 , 19.15163022, 7.24348554, 10.57811529, 19.75374939,
18.20943943, 22.28176618, 18.12091144, 12.23462022, 25.14515739,
13.50928872, 8.66583537, 15.73187863, 15.96399654, 12.59117191,
23.55583777, 17.89899462, 18.94571775, 11.52067712, 13.39007944,
12.59774377, 11.15893922, 10.95135673, 14.42674691, 17.96761055,
16.49122727, 12.35296522, 15.12034443, 15.74986747, 12.51710365,
9.03581021, 13.25291267, 10.15981987, 25.01072793, 18.97541121,
19.75442295, 15.72658347, 15.0584492 , 17.99054063, 5.09522099,
12.27876186, 16.19625502, 11.07278001, 20.34912201, 13.00536634,
16.99162583, 13.84421155, 14.38223744, 14.4279466 , 20.38561836,
11.8408168 , 13.61657858, 20.71889558, 13.34126163, 14.15019112,
25.77991025, 22.55302383, 19.96159611, 13.83428248, 10.01061803,
12.17573564, 12.90200549, 13.5309241 , 19.83578067, 9.19966323,
15.90839431, 10.42371807, 13.24996161, 11.84023659, 2.7949252 ,
18.58428929, 14.08771667, 11.87739049, 12.94936314, 5.72137785,
9.00511033, 24.56904689, 12.63663553, 17.76632913, 23.15421102,
17.48732613, 15.18508386, 16.55137541, 21.34679622, 16.29028048,
11.45359196, 20.79345061, 2.00181134, 15.79546808, 20.83970512,
22.25406941, 10.11771432, 8.67460536, 22.40106521, 8.87171 ,
15.65385265, 11.95693078, 17.83327425, 9.60459978, 8.27386886,
13.12833899, 11.16273179, 19.53131749, 23.54517063, 24.42752656,
23.96310917, 18.4784533 , 7.23297108, 10.54061606, 20.86058868,
11.84239435, 18.31885798, 11.17861973, 10.97931156, 16.58560191,
9.07837253, 15.33024139, 23.10263136, 14.38242525, 20.74808168,
14.23372606, 15.70755858, 21.23669843, 1.35217317, 12.37976423,
7.14600676, 18.39477579, 17.74507314, 18.47339341, 15.85955812,
12.03810285, 19.7152867 , 15.52989343, 9.58677829, 12.8273546 ,
13.12251889, 11.9852327 , 9.64979431, 4.67254736, 5.83485027,
25.58650984, 22.28051372, 10.58979922, 18.30552752, 13.38469023,
15.49887756, 18.9906719 , 15.28172592, 13.98672976, 22.84162786,
11.16412929, 28.13020788, 14.53176387, 10.58195681, 13.62227215,
19.39882903, 16.4404991 , 16.22472679, 21.69188583, 9.39591894,
13.51732061, 12.43309456, 24.88460179, 18.21701532, 12.22188839,
17.66401711, 18.73947878, 19.96433349, 12.44923092, 16.2503667 ,
13.93141598, 20.68562248, 13.22824968, 14.49476981, 14.65557226,
13.50932288, 18.78651139, 13.11609163, 13.72922394, 5.48391225,
21.74546083, 4.54749793, 9.61640779, 12.76025987, 16.6814269 ,
7.54157478, 14.12360955, 20.48599741, 7.25144564, 18.90874243,
18.4448861 , 9.92069343, 16.25574177, 6.89835745, 13.93865526,
21.31573413, 13.36223344, 15.66061316, 19.88795267, 17.3822145 ,
18.74051049, 18.76796461, 4.8938116 , 15.55699742, 13.40461 ,
15.95881726, 18.13730597, 24.84545955, -0.18746347, 15.51857936,
14.85838324, 9.53991868, 8.24792235, 14.38302382, 16.59971307,
19.10383007, 16.70314952, 14.83957597, 19.67174747, 21.01421162,
16.47244826, 11.66594006, 17.90942654, 12.45942559, 11.75413215,
15.8393784 , 16.96650233, 21.34297276, 16.89814547, 17.18400988,
23.06562578, 9.31042315, 17.07230418, 14.9523615 , 15.95823228,
21.05921299, 16.08593307, 19.09498751, 10.39794857, 25.96392485,
14.69617158, 17.66141256, 10.20026414, 6.2382827 , 16.86058806,
19.25205008, 13.70010653, 2.27012525, 16.92507514, 24.39321811,
10.85770341, 23.02441749, 19.2417343 , 19.05590996, 22.87654077,
20.41296773, 23.48198919, 16.00712785, 7.06656844, 15.13328042,
12.70216947, 7.8267746 , 23.43791089, 28.6641215 , 14.95792843,
10.89927121, 8.22974538, 6.62927352, 10.62186059, 12.56804649,
13.18951132, 14.61382172, 23.37917646, 20.56671153, 16.86438089,
18.11011443, 20.30088321, 17.27765895, 7.96486177, 9.15479929])
nd2 = np.random.normal(loc=5, scale=1, size=1000)
nd2
输出
array([4.16357325, 5.62772375, 3.9552129 , 5.01722184, 5.94728292,
3.87611474, 6.40455754, 7.46665515, 3.38515994, 5.34430971,
3.92116982, 5.26760858, 3.87753152, 4.91594112, 2.28170611,
5.22466842, 4.24448323, 5.5262917 , 5.51089455, 6.09550044,
6.8681638 , 4.88041697, 4.89141158, 4.26124332, 5.4022748 ,
4.61356982, 4.62460457, 7.67978794, 4.57731853, 6.67464937,
5.10280295, 4.33677994, 4.19373832, 5.36394475, 4.27075858,
5.39031978, 4.30670864, 3.93891792, 6.19342666, 4.07486768,
6.27780597, 4.55468073, 6.73625441, 5.86779949, 5.49083393,
4.58853995, 5.6148867 , 3.48363662, 6.63894092, 5.22175345,
4.39575909, 4.6296257 , 5.13293428, 4.63102278, 5.83887074,
5.23785433, 5.8753402 , 3.73502303, 5.15459362, 4.57097034,
5.67559993, 4.69832925, 5.65150331, 4.33270153, 3.92274046,
5.70642851, 5.60828203, 4.59928909, 4.16696389, 5.27554203,
6.56415018, 5.11148937, 5.33306229, 4.79096096, 7.14275288,
3.24127949, 5.42305971, 4.8503601 , 4.73999867, 3.30017711,
5.48859061, 8.6720081 , 4.61849386, 5.82255506, 4.94013434,
4.29599778, 3.64307498, 5.91702747, 4.53273311, 2.86494732,
4.83081915, 4.66762299, 5.51913047, 4.86923194, 5.64247402,
6.92941377, 5.80016722, 4.71514671, 4.36175335, 5.29760305,
4.03054667, 5.77336301, 4.27461778, 5.39664654, 2.95356186,
5.69516542, 5.65799556, 5.59469922, 6.1820571 , 3.33644612,
5.01553752, 5.61859069, 5.76889055, 5.33857628, 4.44932627,
5.44414466, 6.69054537, 4.95445522, 3.93424904, 6.14749563,
5.1360424 , 4.9205245 , 6.27270956, 3.402828 , 6.63289999,
4.53090779, 4.06289069, 5.68322406, 4.82075541, 4.18470219,
4.23731251, 5.784845 , 4.69988419, 5.60999033, 3.89867943,
4.91213246, 6.32055125, 3.32742804, 4.387534 , 4.22955403,
5.10986226, 5.31619754, 5.26842713, 4.30329285, 4.02172716,
5.12122162, 5.74783566, 4.89342586, 4.14319473, 4.89207417,
5.17976012, 4.92222856, 4.41448012, 6.11759185, 6.20346597,
6.30937222, 4.45736507, 4.19464601, 5.95110871, 5.39087317,
4.56342672, 5.41105951, 5.19021551, 4.44773853, 6.43024933,
4.2240709 , 3.10760838, 4.10843011, 3.49803467, 4.13008625,
4.93954846, 5.36686046, 4.94802037, 5.92916376, 3.6722152 ,
5.59956637, 2.26681606, 4.98058069, 5.52226543, 4.3678841 ,
5.79127271, 7.01911131, 5.54222329, 5.1736914 , 5.12307703,
3.64086435, 4.49529974, 3.09617049, 5.23362009, 4.83003444,
5.97855068, 4.90465821, 3.3488062 , 5.68806677, 3.10608318,
3.59533022, 4.1582079 , 5.42337516, 5.18032938, 3.80619133,
6.16036382, 7.10779899, 5.79528339, 4.20739267, 5.7849504 ,
6.35741234, 4.87889289, 4.67202464, 5.59772812, 3.89601316,
3.81930584, 3.86334162, 2.12820156, 6.03205986, 3.93786607,
4.69154114, 5.01000487, 5.48795451, 5.64960885, 3.28107906,
6.44071533, 5.84515172, 3.77795634, 4.1543535 , 4.76779398,
6.46011012, 4.20959715, 6.12791645, 3.13111379, 6.32404882,
4.01307035, 7.04022176, 5.88297238, 4.77726627, 5.81734169,
4.5834276 , 4.32792513, 4.78572659, 6.2394519 , 4.95323565,
3.76792869, 5.04695662, 4.6782468 , 4.26210285, 2.68574935,
4.87784877, 4.16379962, 4.89492512, 5.29077423, 6.63540688,
5.91601612, 5.79836639, 4.46454219, 5.59057577, 5.64159953,
3.73283891, 5.76728448, 5.59155539, 4.62458597, 4.59258478,
4.26568473, 4.13624814, 5.45664658, 3.20651961, 4.408289 ,
5.91191617, 6.42389566, 5.3310918 , 5.16260576, 5.08216533,
4.23972288, 5.4333913 , 5.52618567, 3.53352888, 6.18801528,
5.09545601, 5.47251721, 4.52145652, 4.48497876, 6.13442928,
5.46427115, 2.29920696, 5.96166798, 6.86041016, 4.64935911,
4.05211169, 5.22005598, 5.03828418, 3.11720228, 6.5067688 ,
4.16690752, 4.31972409, 7.66002522, 6.29075211, 6.54033656,
4.73315998, 4.48450546, 6.9592562 , 3.47862276, 5.30881486,
4.82692032, 5.57301637, 4.19444764, 5.20859468, 4.50189408,
4.34951492, 4.63316205, 5.55343187, 3.70853791, 6.23107709,
4.27639353, 3.43623509, 3.70621964, 4.20023314, 4.38700297,
6.0247284 , 4.10301279, 1.73386839, 3.8811813 , 5.95340252,
3.32468732, 5.33303337, 5.91283534, 6.00924919, 3.6032262 ,
4.59126025, 4.29693335, 6.78247321, 7.06582257, 3.79111525,
3.78370384, 5.66027207, 4.96055824, 5.70154465, 4.65007777,
4.5035654 , 5.61986658, 4.58564207, 6.36564777, 5.77875425,
4.70467287, 5.17829573, 5.82797022, 3.48804111, 5.03259832,
3.43797394, 5.4791678 , 4.29187316, 5.43345946, 2.97895257,
4.17941581, 6.46108478, 4.16631229, 3.82299511, 4.88063746,
5.99597437, 4.2566243 , 5.61937518, 4.45414788, 5.92053089,
3.49520851, 7.42316601, 5.29250923, 4.28792588, 7.36984588,
4.04040733, 4.26886281, 3.77000458, 4.56072738, 4.63605669,
4.95127483, 3.53601143, 3.72015063, 4.80868551, 3.67868907,
4.44574129, 5.95172132, 5.8080427 , 5.30838712, 5.24493577,
5.19000903, 4.74896707, 5.44316688, 3.96915616, 4.68391686,
7.5726147 , 4.91730717, 5.45240095, 5.9629974 , 2.47171197,
3.69924953, 4.68509439, 4.43821199, 4.99523136, 4.40460386,
4.96303014, 4.71679413, 4.57039619, 5.33357121, 5.15079823,
6.49139364, 3.86845536, 4.98871996, 3.7839628 , 4.273599 ,
5.95315578, 3.74863635, 4.11147589, 5.53989483, 4.80278295,
4.90408198, 6.95778251, 5.19522392, 5.5683528 , 5.98498926,
5.37911824, 6.28442773, 6.06750223, 6.05986868, 4.71704863,
3.99302845, 4.95290817, 2.49207559, 4.87651285, 5.15237529,
3.49276428, 4.99630332, 4.53049613, 4.93972966, 5.14459539,
6.43671059, 3.6443922 , 3.62322561, 4.93096589, 4.03285254,
5.12044196, 4.7470407 , 5.05114539, 4.98548726, 3.84715856,
4.38895987, 5.02571047, 4.27669533, 5.80634551, 3.74243646,
5.74672054, 4.30977187, 5.07860367, 5.36591131, 6.49938734,
4.5655998 , 6.72658129, 5.24367282, 4.48870661, 4.71499478,
6.42220582, 6.1114622 , 5.48564748, 5.57308475, 6.15891602,
5.9758781 , 5.85915079, 3.91862602, 2.12821654, 3.87280873,
3.66024374, 4.50284895, 5.21899411, 5.38545771, 7.16073212,
6.59658302, 5.00053088, 6.18178777, 3.63806579, 5.01496147,
5.90991082, 6.97094723, 4.83097336, 5.30165196, 4.661253 ,
5.86529545, 5.02720143, 4.84598703, 4.33055532, 3.21912353,
4.45419335, 4.77518495, 5.14216553, 6.36418059, 6.26262535,
4.59296046, 5.03454922, 6.00619901, 4.18621875, 4.91093635,
3.29685616, 3.79522521, 6.9291816 , 6.4750611 , 4.89884067,
5.78038694, 4.02603052, 5.70340682, 3.66391833, 6.78237457,
5.45288145, 4.96589685, 4.6344795 , 3.73365996, 4.93605937,
6.30098803, 6.15180329, 3.62245617, 4.31080067, 4.89444726,
4.46041046, 6.46807424, 6.69713398, 5.37660044, 3.82659646,
5.74103172, 4.49821583, 5.42852421, 2.79327574, 4.5965607 ,
4.89687084, 5.06394347, 3.76460552, 5.573225 , 3.72458122,
3.05024013, 5.17225729, 4.97343936, 4.62345663, 6.6316699 ,
3.89960346, 4.6849749 , 5.38701991, 5.70404233, 4.69017532,
5.876532 , 4.74405497, 5.44952709, 4.89016766, 6.21227894,
5.01569238, 6.63113838, 6.08459047, 4.42572288, 4.23403788,
4.58841822, 5.13660595, 3.78069157, 4.95872333, 5.94666079,
6.99971432, 5.57830349, 5.19189896, 3.35896715, 5.36527688,
5.72545598, 3.56843588, 4.57611497, 5.73088564, 6.5592119 ,
5.78983926, 4.76242211, 5.51854665, 5.37074764, 4.80702089,
5.68180577, 6.68828184, 3.97815013, 3.86441813, 5.50323984,
6.2954246 , 6.81212302, 4.34765753, 5.38604839, 3.57545063,
4.2153493 , 6.40788719, 5.04704768, 5.71393966, 5.46348939,
4.36487098, 5.72428558, 3.64913714, 5.09075194, 5.10808268,
4.53924537, 3.95084051, 5.80156225, 3.95139318, 6.40185987,
5.10152297, 5.9083863 , 6.77868656, 4.93491808, 6.87341423,
5.00020607, 5.37300145, 4.94257872, 4.05335049, 5.65874513,
7.05585192, 3.07188578, 6.47767778, 5.15548659, 6.61809565,
4.63849661, 4.00552794, 7.4990428 , 5.55064212, 3.95348289,
6.77047746, 5.17235527, 4.08495527, 5.69626916, 5.27694599,
6.5455655 , 5.03440539, 3.79535853, 3.88216299, 5.66687272,
5.42595946, 3.98806174, 5.38019678, 4.85600623, 6.60249457,
4.43772189, 7.31528327, 5.45132995, 6.46431198, 5.05253691,
5.30463532, 5.59352554, 4.46884691, 4.4371996 , 5.15883358,
5.31511306, 6.16704347, 6.77732211, 6.10656385, 5.15621815,
5.46977566, 4.51159292, 3.94227825, 5.31667496, 5.67738107,
5.28083731, 6.15320115, 3.42747691, 5.72918766, 5.91724691,
4.83514468, 4.19425714, 4.59213095, 4.23491379, 4.44307898,
4.85002062, 3.68601655, 3.34430793, 4.44669012, 5.21651207,
4.39399604, 6.1542975 , 5.18053303, 4.56594122, 5.82571705,
5.27565334, 5.06206244, 3.30770505, 3.91693396, 6.98565476,
3.4926006 , 5.1485241 , 6.21774637, 4.28257422, 6.52841365,
4.62857032, 4.23136567, 3.82420662, 3.74394523, 5.0192126 ,
5.16862418, 5.14095923, 6.43276907, 5.84756387, 5.30127721,
5.2689874 , 3.70364905, 6.55048716, 6.30709324, 3.52150307,
5.95410199, 5.04133322, 4.25300762, 4.66354312, 5.79064765,
4.40433573, 4.45537903, 5.89482175, 5.87593757, 5.68857767,
4.86160107, 4.19193785, 5.17753232, 3.9977945 , 4.9442369 ,
6.07835905, 5.02438589, 6.22749511, 5.24584004, 3.63975901,
4.68905444, 4.78441954, 6.4812497 , 3.69047389, 4.88141576,
5.79649154, 5.04203019, 5.83245323, 3.9658845 , 3.81575922,
4.46942339, 3.53117414, 4.055562 , 5.39131185, 4.68754333,
4.53087311, 4.20410031, 6.27197988, 6.85397581, 4.85151187,
5.02835855, 5.27851599, 5.17836394, 4.06129462, 4.4375603 ,
7.08798937, 5.5102644 , 3.81629454, 5.56699322, 5.10566699,
4.86234081, 3.8165508 , 4.40874826, 3.41589827, 4.29403851,
2.93109546, 4.71142032, 4.21331257, 3.94113757, 4.79319043,
4.51598912, 5.84379645, 5.3311838 , 5.62069354, 4.56616148,
3.78008828, 4.12170311, 3.47164841, 6.45149467, 7.07439161,
5.15775446, 4.99605674, 3.68084454, 4.81140656, 5.31432729,
5.44268601, 4.28726986, 7.01091418, 3.61892038, 4.94733244,
4.32424272, 4.0247057 , 5.80500818, 5.6972797 , 5.65320886,
6.39972657, 3.3030268 , 5.06080049, 2.94627967, 5.61769648,
4.40227402, 3.50383268, 5.70211839, 4.11933258, 5.08979386,
4.2676073 , 4.36874 , 5.60255516, 4.0650777 , 3.69051336,
6.11521605, 5.13445683, 5.62023694, 5.67238026, 4.62919193,
6.29114967, 5.76445703, 5.33025919, 3.04761616, 5.74038603,
6.90571771, 4.88536199, 4.37548828, 4.03133291, 3.98986754,
5.18311004, 3.60657778, 4.68029741, 6.77247176, 4.05498816,
5.81458552, 5.68999805, 4.49644808, 7.23364798, 4.78154564,
4.30177813, 4.15465457, 4.53973133, 5.27878305, 4.19131211,
3.85860472, 6.69376269, 6.05901324, 5.08240134, 4.48290616,
4.74503704, 4.56161557, 4.76352048, 5.58432118, 6.72418505,
4.58035456, 6.77123094, 5.92590072, 5.92009114, 3.80774064,
7.94398176, 4.16758862, 4.72331011, 5.78362765, 3.21055947,
5.76164498, 6.4397337 , 4.44481261, 3.33294915, 6.01488334,
6.81204549, 2.42044915, 4.4403678 , 5.47945654, 4.07128056,
5.30664905, 6.41306649, 6.01389555, 4.4295987 , 7.10258643,
4.65877207, 4.96947419, 5.99469993, 6.2051869 , 5.70905434,
4.61033172, 5.73426407, 4.35125811, 4.53512318, 3.95150925,
6.78339022, 4.30245366, 5.76650037, 5.15131781, 5.52825882,
6.8477806 , 5.18687599, 4.53926858, 7.22687264, 5.63437343,
5.86334083, 5.63117323, 3.69713011, 6.21429763, 4.22379533,
4.58421616, 4.20044833, 4.88802996, 6.11973732, 4.91362631,
5.99532401, 5.23794912, 4.71085432, 5.65410814, 5.13094576,
4.77431904, 5.86111277, 5.20227685, 5.20100846, 3.51333093,
5.61254166, 5.17714846, 4.34431496, 4.04362843, 5.32914837,
4.45681308, 5.11813353, 6.10079781, 5.20358212, 3.62758645,
5.52902048, 4.01751996, 5.43673173, 4.99834851, 4.23534757,
4.06770858, 5.3580892 , 4.40710573, 5.43241561, 4.60487781,
6.48997503, 3.70168528, 5.23138227, 6.23080636, 4.74835939,
3.80599272, 3.29946934, 6.34557388, 4.93872346, 6.47946449,
6.41594052, 6.73218324, 3.04289943, 4.15115282, 5.10402299,
4.50899173, 4.5168367 , 4.65360808, 6.01393715, 6.4251474 ,
4.46519491, 5.05383961, 4.87180319, 3.27026279, 5.01396013,
4.34003878, 5.76289112, 5.32605534, 4.25412225, 5.12852744,
3.58240371, 3.23632483, 5.32664125, 4.8313264 , 3.84089868,
6.76103641, 4.67086501, 4.974855 , 5.90858545, 4.11203174,
3.47689329, 4.59854682, 3.79011385, 5.67780065, 4.04810594,
5.67560683, 4.41887216, 5.47302958, 3.9217647 , 4.22237868,
5.03075115, 5.40360709, 5.63968412, 5.95374395, 5.32658004,
5.41877689, 4.8850843 , 4.61497329, 4.62041275, 5.04798966,
5.41226352, 4.7350273 , 2.27956637, 3.28210161, 4.90247233,
5.97151358, 3.03264944, 4.59598116, 5.61863583, 5.10906549,
5.896816 , 5.0875292 , 6.55366492, 6.06902742, 4.61753771,
4.35189183, 4.70411494, 6.3450411 , 4.79457834, 4.91535206])
nd3 = np.concatenate([nd1, nd2])
nd3
输出
array([16.50002167, 21.95871306, 22.75958416, …, 6.3450411 ,
4.79457834, 4.91535206])
s8 =Series(nd3)
s8
输出
0 16.500022
1 21.958713
2 22.759584
3 17.311623
4 21.113620
5 18.265816
6 14.367717
7 14.041937
8 24.472507
9 12.154959
10 9.272915
11 12.313423
12 10.209241
13 26.501067
14 15.738136
15 15.967169
16 16.691080
17 17.470095
18 23.255858
19 3.750134
20 25.121658
21 9.213113
22 20.805368
23 15.541248
24 28.903056
25 20.532467
26 11.801656
27 21.608022
28 11.175858
29 16.327408
…
1970 5.030751
1971 5.403607
1972 5.639684
1973 5.953744
1974 5.326580
1975 5.418777
1976 4.885084
1977 4.614973
1978 4.620413
1979 5.047990
1980 5.412264
1981 4.735027
1982 2.279566
1983 3.282102
1984 4.902472
1985 5.971514
1986 3.032649
1987 4.595981
1988 5.618636
1989 5.109065
1990 5.896816
1991 5.087529
1992 6.553665
1993 6.069027
1994 4.617538
1995 4.351892
1996 4.704115
1997 6.345041
1998 4.794578
1999 4.915352
Length: 2000, dtype: float64
# 密度图
s8.plot(kind='kde')
s8.hist(bins=70)
s8.plot(kind='kde')
# 怎么解决密度线趴下来的问题
# normed 统一的,将数据归一化
s8.hist(bins= 70, normed=True)
## 散布图
df = DataFrame(np.random.randint(0, 150, size=(10, 3)),columns=['Python', 'Java', 'PHP'])
df
Python | Java | PHP | |
---|---|---|---|
0 | 50 | 148 | 144 |
1 | 43 | 104 | 98 |
2 | 16 | 139 | 91 |
3 | 37 | 54 | 79 |
4 | 120 | 111 | 147 |
5 | 2 | 90 | 132 |
6 | 104 | 33 | 130 |
7 | 59 | 32 | 82 |
8 | 39 | 17 | 52 |
9 | 147 | 49 | 29 |
df['C++'] = df['Python'].map(lambda x : 0.7 * x + 20)
df
Python | Java | PHP | C++ | |
---|---|---|---|---|
0 | 50 | 148 | 144 | 55.0 |
1 | 43 | 104 | 98 | 50.1 |
2 | 16 | 139 | 91 | 31.2 |
3 | 37 | 54 | 79 | 45.9 |
4 | 120 | 111 | 147 | 104.0 |
5 | 2 | 90 | 132 | 21.4 |
6 | 104 | 33 | 130 | 92.8 |
7 | 59 | 32 | 82 | 61.3 |
8 | 39 | 17 | 52 | 47.3 |
9 | 147 | 49 | 29 | 122.9 |
散布图 散布图是观察两个一维数据列之间的关系的有效方法
注意是用kind=’scatter’ , 给标签columns
# scatter 需要x和y两个参数,作用是对比
df.plot(kind='scatter',x='Python', y='Java')
df.plot(kind='scatter',x='Python', y='C++')
散布图矩阵,当有多个时,两两点之间的联系
函数:pd.plotting.scatter_matrix(),注意参数diagnol:对角线
pd.plotting.scatter_matrix(df, figsize=(12, 9))
array([[,
,
,
],
[,
,
,
],
[,
,
,
],
[,
,
,
]],
dtype=object)
pd.plotting.scatter_matrix(df, figsize=(12, 9), diagonal='kde')
# 散点图可以让我们找到两列数据之间的关系
# 一,如果数据量太小,两列数据没有关系,加大数据量,又会形成关系
# 二,并不是说数据量越大,两列之间的关系就越明显,在数据过大的情况下,
# 关系会发生改变