import pandas as pd
import numpy as np
from pandas import Series,DataFrame
dict_city={
'shanghai':{2019:54000,2020:56000},
'beijing':{2018:66666,2019:77777}
}
df_city=DataFrame(dict_city)
df_city
shanghai beijing
2019 54000.0 77777.0
2020 56000.0 NaN
2018 NaN 66666.0
dict_city={
'shanghai':{2019:54000,2020:56000},
'beijing':{2018:66666,2019:77777}
}
dict_city
{'shanghai': {2019: 54000, 2020: 56000}, 'beijing': {2018: 66666, 2019: 77777}}
df_city
shanghai beijing
2019 54000.0 77777.0
2020 56000.0 NaN
2018 NaN 66666.0
df_city.insert(2,'shenzhen',[60000,80000,70000])
df_city
shanghai beijing shenzhen
2019 54000.0 77777.0 60000
2020 56000.0 NaN 80000
2018 NaN 66666.0 70000
df_city.insert(2,'shenzhen',[60000,80000,70000],allow_duplicates=True)
df_city
shanghai beijing shenzhen shenzhen
2019 54000.0 77777.0 60000 60000
2020 56000.0 NaN 80000 80000
2018 NaN 66666.0 70000 70000
df_city[['shanghai','beijing','shenzhen']]
df_city
shanghai beijing shenzhen shenzhen
2019 54000.0 77777.0 60000 60000
2020 56000.0 NaN 80000 80000
2018 NaN 66666.0 70000 70000
dict_city={
'shanghai':{2019:54000,2020:56000},
'beijing':{2018:66666,2019:77777}
}
df_city=DataFrame(dict_city)
df_city
shanghai beijing
2019 54000.0 77777.0
2020 56000.0 NaN
2018 NaN 66666.0
df_city.index
Int64Index([2019, 2020, 2018], dtype='int64')
df_city.columns
Index(['shanghai', 'beijing'], dtype='object')
df_city.index.name='年份'
df_city.columns.name='城市'
df_city
城市 shanghai beijing
年份
2019 54000.0 77777.0
2020 56000.0 NaN
2018 NaN 66666.0
df_city.index
Int64Index([2019, 2020, 2018], dtype='int64', name='年份')
pd.Index(
s=Series([1,2,3,4],index=['a','b','c','d'])
s.index.name='haha'
s
s.index
Index(['a', 'b', 'c', 'd'], dtype='object', name='haha')
s2=Series([2,4,5,6],index=pd.Index(['a','b','c','d'],name='字母'))
s2
字母
a 2
b 4
c 5
d 6
dtype: int64
pd.date_range(start,end,periods)
pd.date_range('2020-08-01','2020-08-10')
DatetimeIndex(['2020-08-01', '2020-08-02', '2020-08-03', '2020-08-04',
'2020-08-05', '2020-08-06', '2020-08-07', '2020-08-08',
'2020-08-09', '2020-08-10'],
dtype='datetime64[ns]', freq='D')
pd.date_range(start='2020-08-05',periods=10)
DatetimeIndex(['2020-08-05', '2020-08-06', '2020-08-07', '2020-08-08',
'2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12',
'2020-08-13', '2020-08-14'],
dtype='datetime64[ns]', freq='D')
pd.date_range(end='20200812',periods=20)
DatetimeIndex(['2020-07-24', '2020-07-25', '2020-07-26', '2020-07-27',
'2020-07-28', '2020-07-29', '2020-07-30', '2020-07-31',
'2020-08-01', '2020-08-02', '2020-08-03', '2020-08-04',
'2020-08-05', '2020-08-06', '2020-08-07', '2020-08-08',
'2020-08-09', '2020-08-10', '2020-08-11', '2020-08-12'],
dtype='datetime64[ns]', freq='D')
data=DataFrame(np.random.randn(550,4),
index=pd.date_range('2019-01-01',periods=550),
columns='天,地,玄,黄'.split(','))
data
天 地 玄 黄
2019-01-01 -1.100782 -0.736156 0.065437 -0.602575
2019-01-02 1.296963 1.423391 3.390231 -0.057892
... ... ... ... ...
-0.602785
2020-07-02 0.791993 1.205519 0.073791 -1.284437
2020-07-03 -1.298447 0.519443 -0.075990 -0.243295
550 rows × 4 columns
data.loc['20190508']
天 0.466288
地 0.306702
玄 -0.653269
黄 -0.401242
Name: 2019-05-08 00:00:00, dtype: float64
data.loc['20190508':'20191010']
天 地 玄 黄
2019-05-08 0.466288 0.306702 -0.653269 -0.401242
2019-05-09 -1.209363 -0.880524 0.027211 0.860171
2019-05-10 1.672365 0.013685 -0.069262 0.592141
... ... ... ... ...
2019-10-09 -0.645113 -0.963626 0.376894 -0.752427
2019-10-10 -1.149268 -2.190506 -2.008276 0.027101
156 rows × 4 columns
data.loc['2019-11']
天 地 玄 黄
2019-11-01 0.235139 1.552186 -2.835154 -0.954115
2019-11-02 0.331377 -0.399864 -0.976208 0.095694
2019-11-03 1.062406 -0.011718 0.651712 -0.985060
2019-11-04 -0.680149 -2.013491 0.007301 -0.239927
2019-11-05 -0.574458 1.805574 -0.159081 -0.932177
..........
2019-11-27 -0.589245 1.300695 -1.317557 -1.822447
2019-11-28 -0.206663 0.652174 -1.413524 -0.967720
2019-11-29 -0.556445 -0.155306 -1.440397 -0.151424
2019-11-30 0.020656 -0.661730 -0.473776 2.642046