import pandas as pd
from pandas import DataFrame
import numpy as np
DataFrame是一个表格型的数据结构,既有行索引(保存在index)又有列索引(保存在columns)。
dict1={"Province":["Guangdong","Beijing","Qinghai","Fujiang"],
"year":[2018]*4,
"pop":[1.3,2.5,1.1,0.7]}
df1=DataFrame(dict1)
df1
代码结果:
Province | pop | year | |
---|---|---|---|
0 | Guangdong | 1.3 | 2018 |
1 | Beijing | 2.5 | 2018 |
2 | Qinghai | 1.1 | 2018 |
3 | Fujiang | 0.7 | 2018 |
df2=DataFrame(dict1,columns=['year','Province','pop','debt'],index=['one','two','three','four'])
df2
代码结果:
year | Province | pop | debt | |
---|---|---|---|---|
one | 2018 | Guangdong | 1.3 | NaN |
two | 2018 | Beijing | 2.5 | NaN |
three | 2018 | Qinghai | 1.1 | NaN |
four | 2018 | Fujiang | 0.7 | NaN |
df2
代码结果:
year | Province | pop | debt | |
---|---|---|---|---|
one | 2018 | Guangdong | 1.3 | NaN |
two | 2018 | Beijing | 2.5 | NaN |
three | 2018 | Qinghai | 1.1 | NaN |
four | 2018 | Fujiang | 0.7 | NaN |
df2.index.name='English'
df2.columns.name='Province'
df2
代码结果:
Province | year | Province | pop | debt |
---|---|---|---|---|
English | ||||
one | 2018 | Guangdong | 1.3 | NaN |
two | 2018 | Beijing | 2.5 | NaN |
three | 2018 | Qinghai | 1.1 | NaN |
four | 2018 | Fujiang | 0.7 | NaN |
df2.shape
代码结果:
(4, 4)
df2.values
代码结果:
array([[2018, 'Guangdong', 1.3, nan],
[2018, 'Beijing', 2.5, nan],
[2018, 'Qinghai', 1.1, nan],
[2018, 'Fujiang', 0.7, nan]], dtype=object)
df2.Province
代码结果:
English
one Guangdong
two Beijing
three Qinghai
four Fujiang
Name: Province, dtype: object
df2['Province']
代码结果: English one Guangdong two Beijing three Qinghai four Fujiang Name: Province, dtype: object
df2[['Province','pop']]
代码结果:
Province | Province | pop |
---|---|---|
English | ||
one | Guangdong | 1.3 |
two | Beijing | 2.5 |
three | Qinghai | 1.1 |
four | Fujiang | 0.7 |
df2.loc['one']
代码结果:
Province
year 2018
Province Guangdong
pop 1.3
debt NaN
Name: one, dtype: object
df2.loc['one':'three']
代码结果:
Province | year | Province | pop | debt |
---|---|---|---|---|
English | ||||
one | 2018 | Guangdong | 1.3 | NaN |
two | 2018 | Beijing | 2.5 | NaN |
three | 2018 | Qinghai | 1.1 | NaN |
df2.loc['one','Province']
代码结果:
'Guangdong'
df2["debt"]=np.arange(2,3,0.25)
df2
代码结果:
Province | year | Province | pop | debt |
---|---|---|---|---|
English | ||||
one | 2018 | Guangdong | 1.3 | 2.00 |
two | 2018 | Beijing | 2.5 | 2.25 |
three | 2018 | Qinghai | 1.1 | 2.50 |
four | 2018 | Fujiang | 0.7 | 2.75 |
df2['eastern']=df2.Province=='Guangdong'
df2
代码结果:
Province | year | Province | pop | debt | eastern |
---|---|---|---|---|---|
English | |||||
one | 2018 | Guangdong | 1.3 | 2.00 | True |
two | 2018 | Beijing | 2.5 | 2.25 | False |
three | 2018 | Qinghai | 1.1 | 2.50 | False |
four | 2018 | Fujiang | 0.7 | 2.75 | False |
del df2['eastern']
df2.columns
代码结果:
Index(['year', 'Province', 'pop', 'debt'], dtype='object', name='Province')
df2.T
English | one | two | three | four |
---|---|---|---|---|
Province | ||||
year | 2018 | 2018 | 2018 | 2018 |
Province | Guangdong | Beijing | Qinghai | Fujiang |
pop | 1.3 | 2.5 | 1.1 | 0.7 |
debt | 2 | 2.25 | 2.5 | 2.75 |
1 二维数组
df3=pd.DataFrame(np.random.randint(0,10,(4,4)),index=[1,2,3,4],columns=['A','B','C','D'])
df3
代码结果:
A | B | C | D | |
---|---|---|---|---|
1 | 9 | 8 | 4 | 6 |
2 | 5 | 7 | 7 | 4 |
3 | 6 | 3 | 0 | 2 |
4 | 4 | 6 | 9 | 8 |
2 字典
行索引由index决定,列索引由字典的键决定
dict1
代码结果:
{'Province': ['Guangdong', 'Beijing', 'Qinghai', 'Fujiang'],
'pop': [1.3, 2.5, 1.1, 0.7],
'year': [2018, 2018, 2018, 2018]}
df4=pd.DataFrame(dict1,index=[1,2,3,4])
df4
代码结果:
Province | pop | year | |
---|---|---|---|
1 | Guangdong | 1.3 | 2018 |
2 | Beijing | 2.5 | 2018 |
3 | Qinghai | 1.1 | 2018 |
4 | Fujiang | 0.7 | 2018 |
3 结构数组
其中列索引由结构数组的字段名决定
arr=np.array([('item1',10),('item2',20),('item3',30),('item4',40)],dtype=[("name","10S"),("count",int)])
df5=pd.DataFrame(arr)
df5
代码结果:
name | count | |
---|---|---|
0 | b’item1’ | 10 |
1 | b’item2’ | 20 |
2 | b’item3’ | 30 |
3 | b’item4’ | 40 |
dict2={"a":[1,2,3],"b":[4,5,6]}
df6=pd.DataFrame.from_dict(dict2)
df6
代码结果:
a | b | |
---|---|---|
0 | 1 | 4 |
1 | 2 | 5 |
2 | 3 | 6 |
df7=pd.DataFrame.from_dict(dict2,orient="index")
df7
代码结果:
0 | 1 | 2 | |
---|---|---|---|
a | 1 | 2 | 3 |
b | 4 | 5 | 6 |
df7.to_dict()
代码结果:
{0: {'a': 1, 'b': 4}, 1: {'a': 2, 'b': 5}, 2: {'a': 3, 'b': 6}}
df7.to_dict(orient="records")
代码结果:
[{0: 1, 1: 2, 2: 3}, {0: 4, 1: 5, 2: 6}]
df7.to_dict(orient="list")
代码结果:
{0: [1, 4], 1: [2, 5], 2: [3, 6]}
谢谢大家的浏览,
希望我的努力能帮助到您,
共勉!