导入数据
import pandas as pd
date=pd.read_excel('meal_order_detail.xlsx')
print('数据的所有值为:\n',date.values)
数据的所有值为:
[[2956 417 610062 ... nan 'caipu/104001.jpg' 1442]
[2958 417 609957 ... nan 'caipu/202003.jpg' 1442]
[2961 417 609950 ... nan 'caipu/303001.jpg' 1442]
...
[6756 774 609949 ... nan 'caipu/404005.jpg' 1138]
[6763 774 610014 ... nan 'caipu/302003.jpg' 1138]
[6764 774 610017 ... nan 'caipu/302006.jpg' 1138]]
print('数据列名为:\n',date.columns)
数据列名为:
Index(['detail_id', 'order_id', 'dishes_id', 'logicprn_name',
'parent_class_name', 'dishes_name', 'itemis_add', 'counts', 'amounts',
'cost', 'place_order_time', 'discount_amt', 'discount_reason',
'kick_back', 'add_inprice', 'add_info', 'bar_code', 'picture_file',
'emp_id'],
dtype='object')
print('数据的所有数据类型为:\n',date.dtypes)
数据的所有数据类型为:
detail_id int64
order_id int64
dishes_id int64
logicprn_name float64
parent_class_name float64
dishes_name object
itemis_add int64
counts int64
amounts int64
cost float64
place_order_time datetime64[ns]
discount_amt float64
discount_reason float64
kick_back float64
add_inprice int64
add_info float64
bar_code float64
picture_file object
emp_id int64
dtype: object
print('数据的元素个数为:\n',date.size)
print('数据的维度为\n',date.ndim)
print('数据的形状为:\n',date.shape)
数据的元素个数为:
52801
数据的维度为
2
数据的形状为:
(2779, 19)
#访问单列多行数据
dishes_name5=date['dishes_name'][:5]
print('dishes的前五个元素为:\n',dishes_name5)
dishes的前五个元素为:
0 蒜蓉生蚝
1 蒙古烤羊腿\r\n\r\n\r\n
2 大蒜苋菜
3 芝麻烤紫菜
4 蒜香包
Name: dishes_name, dtype: object
#访问多行数据
order5=date[:][:6]
print('访问数据前6行所有的数据:\n',order5)
访问数据前五行所有的数据:
detail_id order_id dishes_id logicprn_name parent_class_name \
0 2956 417 610062 NaN NaN
1 2958 417 609957 NaN NaN
2 2961 417 609950 NaN NaN
3 2966 417 610038 NaN NaN
4 2968 417 610003 NaN NaN
5 1899 301 610019 NaN NaN
dishes_name itemis_add counts amounts cost place_order_time \
0 蒜蓉生蚝 0 1 49 NaN 2016-08-01 11:05:36
1 蒙古烤羊腿\r\n\r\n\r\n 0 1 48 NaN 2016-08-01 11:07:07
2 大蒜苋菜 0 1 30 NaN 2016-08-01 11:07:40
3 芝麻烤紫菜 0 1 25 NaN 2016-08-01 11:11:11
4 蒜香包 0 1 13 NaN 2016-08-01 11:11:30
5 白斩鸡 0 1 88 NaN 2016-08-01 11:15:57
discount_amt discount_reason kick_back add_inprice add_info bar_code \
0 NaN NaN NaN 0 NaN NaN
1 NaN NaN NaN 0 NaN NaN
2 NaN NaN NaN 0 NaN NaN
3 NaN NaN NaN 0 NaN NaN
4 NaN NaN NaN 0 NaN NaN
5 NaN NaN NaN 0 NaN NaN
picture_file emp_id
0 caipu/104001.jpg 1442
1 caipu/202003.jpg 1442
2 caipu/303001.jpg 1442
3 caipu/105002.jpg 1442
4 caipu/503002.jpg 1442
5 caipu/204002.jpg 1095
#head,tail分别访问数据的头部和尾部,默认为五行
print('数据前五行:\n',date.head())
print('数据后五行:\n',date.tail())
数据前五行:
detail_id order_id dishes_id logicprn_name parent_class_name \
0 2956 417 610062 NaN NaN
1 2958 417 609957 NaN NaN
2 2961 417 609950 NaN NaN
3 2966 417 610038 NaN NaN
4 2968 417 610003 NaN NaN
dishes_name itemis_add counts amounts cost place_order_time \
0 蒜蓉生蚝 0 1 49 NaN 2016-08-01 11:05:36
1 蒙古烤羊腿\r\n\r\n\r\n 0 1 48 NaN 2016-08-01 11:07:07
2 大蒜苋菜 0 1 30 NaN 2016-08-01 11:07:40
3 芝麻烤紫菜 0 1 25 NaN 2016-08-01 11:11:11
4 蒜香包 0 1 13 NaN 2016-08-01 11:11:30
discount_amt discount_reason kick_back add_inprice add_info bar_code \
0 NaN NaN NaN 0 NaN NaN
1 NaN NaN NaN 0 NaN NaN
2 NaN NaN NaN 0 NaN NaN
3 NaN NaN NaN 0 NaN NaN
4 NaN NaN NaN 0 NaN NaN
picture_file emp_id
0 caipu/104001.jpg 1442
1 caipu/202003.jpg 1442
2 caipu/303001.jpg 1442
3 caipu/105002.jpg 1442
4 caipu/503002.jpg 1442
数据后五行:
detail_id order_id dishes_id logicprn_name parent_class_name \
2774 6750 774 610011 NaN NaN
2775 6742 774 609996 NaN NaN
2776 6756 774 609949 NaN NaN
2777 6763 774 610014 NaN NaN
2778 6764 774 610017 NaN NaN
dishes_name itemis_add counts amounts cost place_order_time \
2774 白饭/大碗 0 1 10 NaN 2016-08-10 21:56:24
2775 牛尾汤 0 1 40 NaN 2016-08-10 21:56:48
2776 意文柠檬汁 0 1 13 NaN 2016-08-10 22:01:52
2777 金玉良缘 0 1 30 NaN 2016-08-10 22:03:58
2778 酸辣藕丁 0 1 33 NaN 2016-08-10 22:04:30
discount_amt discount_reason kick_back add_inprice add_info \
2774 NaN NaN NaN 0 NaN
2775 NaN NaN NaN 0 NaN
2776 NaN NaN NaN 0 NaN
2777 NaN NaN NaN 0 NaN
2778 NaN NaN NaN 0 NaN
bar_code picture_file emp_id
2774 NaN caipu/601005.jpg 1138
2775 NaN caipu/201006.jpg 1138
2776 NaN caipu/404005.jpg 1138
2777 NaN caipu/302003.jpg 1138
2778 NaN caipu/302006.jpg 1138
#loc与iloc切片,前者根据索引名称切片,后者可通过索引切片,loc使用各个场景,程序可读性强
#使用loc与iloc实现多列切片
order_loc=date.loc[:,['detail_id','order_id']]
print('loc多行切片:\n',order_loc.shape)
order_iloc=date.iloc[:,[1,2]]
print('iloc多行切片:\n',order_iloc.shape)
loc多行切片:
(2779, 2)
iloc多行切片:
(2779, 2)
#使用loc实现条件切片
order_id_417=date.loc[date['order_id'] ==417,['order_id','dishes_name']]
print('使用loc筛选order_id等于417的菜品:\n',order_id_369)
使用loc筛选order_id等于417的菜品:
order_id dishes_name
0 417 蒜蓉生蚝
1 417 蒙古烤羊腿\r\n\r\n\r\n
2 417 大蒜苋菜
3 417 芝麻烤紫菜
4 417 蒜香包
#使用iloc实现条件切片
order_id_417=date.iloc[(date['order_id'] ==417).values,[1,5]]
print('使用iloc筛选order_id等于417的菜品:\n',order_id_417)
使用iloc筛选order_id等于417的菜品:
order_id dishes_name
0 417 蒜蓉生蚝
1 417 蒙古烤羊腿\r\n\r\n\r\n
2 417 大蒜苋菜
3 417 芝麻烤紫菜
4 417 蒜香包
#更改DateFrame中的数据类型,借助loc切片
date.loc[date['order_id']==417,'order_id']=666
print('将id=417该为666后id=417的数据为:\n',date.loc[date['order_id']==417,'order_id'])
print('将id=417该为666后id=666的数据为:\n',date.loc[date['order_id']==666,'order_id'])
将id=417该为666后id=417的数据为:
Series([], Name: order_id, dtype: int64)
将id=417该为666后id=666的数据为:
0 666
1 666
2 666
3 666
4 666
Name: order_id, dtype: int64
#为DateFrame增加数据
print('counts和amounts前五行数据为:\n',date[['counts','amounts']].head())
date['payment']=date['counts']*date['amounts']
print('增加列属性payment:\n',date[['counts','amounts','payment']].head())
counts和amounts前五行数据为:
counts amounts
0 1 49
1 1 48
2 1 30
3 1 25
4 1 13
增加列属性payment:
counts amounts payment
0 1 49 49
1 1 48 48
2 1 30 30
3 1 25 25
4 1 13 13