pandas
官网 :pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.
- 常用数据类型
- Series 一维, 带标签的数组
- DataFrame 二维, Series 容器
import pandas as pd
import string
import numpy as np
t = pd.Series(np.arange(10), index=list(string.ascii_uppercase[:10]))
t
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int64
type(t)
pandas.core.series.Series
a = {string.ascii_uppercase[i] : i for i in range(10)}
a
{'A': 0,
'B': 1,
'C': 2,
'D': 3,
'E': 4,
'F': 5,
'G': 6,
'H': 7,
'I': 8,
'J': 9}
pd.Series(a)
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int64
pd.Series(a, index=list(string.ascii_uppercase[5:15]))
# dtype 为 float64 np中的nan 为float
F 5.0
G 6.0
H 7.0
I 8.0
J 9.0
K NaN
L NaN
M NaN
N NaN
O NaN
dtype: float64
t
A 0
B 1
C 2
D 3
E 4
F 5
G 6
H 7
I 8
J 9
dtype: int64
t[2:10:2]
C 2
E 4
G 6
I 8
dtype: int64
t[1]
1
t[[2, 3, 6]]
C 2
D 3
G 6
dtype: int64
t[t>4]
F 5
G 6
H 7
I 8
J 9
dtype: int64
t["F"]
5
t[["A", "F", "g"]]
/anaconda3/lib/python3.6/site-packages/pandas/core/series.py:851: FutureWarning:
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.
See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
return self.loc[key]
A 0.0
F 5.0
g NaN
dtype: float64
t.index
Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], dtype='object')
t.values
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
type(t.index)
pandas.core.indexes.base.Index
type(t.values)
numpy.ndarray
pandas之DataFrame
import webbrowser
link = "https://www.tiobe.com/tiobe-index/"
webbrowser.open(link)
True
df = pd.read_clipboard()
df
t = pd.DataFrame(np.arange(12).reshape((3, 4)))
t
|
0 |
1 |
2 |
3 |
---|
0 |
0 |
1 |
2 |
3 |
---|
1 |
4 |
5 |
6 |
7 |
---|
2 |
8 |
9 |
10 |
11 |
---|
- DataFrame对象既有行索引,又有列索引
- 行索引,表明不同行,横向索引,叫index,0轴,axis=0
- 列索引,表名不同列,纵向索引,叫columns,1轴,axis=1
t1 = pd.DataFrame(np.arange(12).reshape(3, 4), index=list(string.ascii_lowercase[:3]), columns=list(string.ascii_uppercase[-4:]))
t1
|
W |
X |
Y |
Z |
---|
a |
0 |
1 |
2 |
3 |
---|
b |
4 |
5 |
6 |
7 |
---|
c |
8 |
9 |
10 |
11 |
---|
d1 = {"name":["xiaoming", "xiaogang"], "age":[20, 22], "tel": [10086, 10010]}
# 传入字典
t2 = pd.DataFrame(d1)
t2
|
name |
age |
tel |
---|
0 |
xiaoming |
20 |
10086 |
---|
1 |
xiaogang |
22 |
10010 |
---|
type(t2)
pandas.core.frame.DataFrame
# 通过列表字典创建
d2 = [{"name": "xiaoming","age":33,"tel":10010},{"name":"xiaohong","tel": 10010},{"name":"xiaowang","age":19}]
t3 = pd.DataFrame(d2) #没有值得地方是NaN
t3
|
age |
name |
tel |
---|
0 |
33.0 |
xiaoming |
10010.0 |
---|
1 |
NaN |
xiaohong |
10010.0 |
---|
2 |
19.0 |
xiaowang |
NaN |
---|
属性
t3.index
#行索引
RangeIndex(start=0, stop=3, step=1)
t3.columns
#列索引
Index(['age', 'name', 'tel'], dtype='object')
t3.values
# ndarray类型
array([[33.0, 'xiaoming', 10010.0],
[nan, 'xiaohong', 10010.0],
[19.0, 'xiaowang', nan]], dtype=object)
t3.shape
(3, 3)
t3.dtypes
age float64
name object
tel float64
dtype: object
# 维度
t3.ndim
2
整体情况