import numpy as np
import pandas as pd
df = pd.read_csv('learn_pandas.csv',usecols = ['School','Grade', 'Name','Gender','Weight','Transfer'])
df_demo = df.set_index('Name')
df_demo
|
School |
Grade |
Gender |
Weight |
Transfer |
Name |
|
|
|
|
|
Gaopeng Yang |
Shanghai Jiao Tong University |
Freshman |
Female |
46.0 |
N |
Changqiang You |
Peking University |
Freshman |
Male |
70.0 |
N |
Mei Sun |
Shanghai Jiao Tong University |
Senior |
Male |
89.0 |
N |
Xiaojuan Sun |
Fudan University |
Sophomore |
Female |
41.0 |
N |
Gaojuan You |
Fudan University |
Sophomore |
Male |
74.0 |
N |
... |
... |
... |
... |
... |
... |
Xiaojuan Sun |
Fudan University |
Junior |
Female |
46.0 |
N |
Li Zhao |
Tsinghua University |
Senior |
Female |
50.0 |
N |
Chengqiang Chu |
Shanghai Jiao Tong University |
Senior |
Female |
45.0 |
N |
Chengmei Shen |
Shanghai Jiao Tong University |
Senior |
Male |
71.0 |
N |
Chunpeng Lv |
Tsinghua University |
Sophomore |
Male |
51.0 |
N |
200 rows × 5 columns
df_loc_slice_demo = df_demo.copy()
df_loc_slice_demo.index = range(df_demo.shape[0],0,-1)
df_loc_slice_demo.index
RangeIndex(start=200, stop=0, step=-1)
df_loc_slice_demo.loc[5:3]
|
School |
Grade |
Gender |
Weight |
Transfer |
5 |
Fudan University |
Junior |
Female |
46.0 |
N |
4 |
Tsinghua University |
Senior |
Female |
50.0 |
N |
3 |
Shanghai Jiao Tong University |
Senior |
Female |
45.0 |
N |
np.random.seed(0)
multi_index = pd.MultiIndex.from_product([list('ABCD'),df.Gender.unique()],names=('School', 'Gender'))
multi_index
MultiIndex([('A', 'Female'),
('A', 'Male'),
('B', 'Female'),
('B', 'Male'),
('C', 'Female'),
('C', 'Male'),
('D', 'Female'),
('D', 'Male')],
names=['School', 'Gender'])
multi_column=pd.MultiIndex.from_product([['Height','Weight'],df.Grade.unique()],names=('Indicator','Grade'))
multi_column
MultiIndex([('Height', 'Freshman'),
('Height', 'Senior'),
('Height', 'Sophomore'),
('Height', 'Junior'),
('Weight', 'Freshman'),
('Weight', 'Senior'),
('Weight', 'Sophomore'),
('Weight', 'Junior')],
names=['Indicator', 'Grade'])
df_multi = pd.DataFrame(np.c_[(np.random.rand(8,4)*5 + 163).tolist(), (np.random.randn(8,4)*5 + 65).tolist()],
index = multi_index,
columns = multi_column).round(1)
df_multi
|
Indicator |
Height |
Weight |
|
Grade |
Freshman |
Senior |
Sophomore |
Junior |
Freshman |
Senior |
Sophomore |
Junior |
School |
Gender |
|
|
|
|
|
|
|
|
A |
Female |
165.7 |
166.6 |
166.0 |
165.7 |
76.3 |
57.7 |
65.2 |
64.1 |
Male |
165.1 |
166.2 |
165.2 |
167.5 |
72.7 |
72.3 |
65.8 |
66.9 |
B |
Female |
167.8 |
164.9 |
167.0 |
165.6 |
60.6 |
55.1 |
63.3 |
65.8 |
Male |
165.8 |
167.6 |
163.4 |
163.4 |
71.2 |
71.0 |
63.1 |
63.5 |
C |
Female |
163.1 |
167.2 |
166.9 |
167.4 |
59.8 |
57.9 |
56.5 |
74.8 |
Male |
167.9 |
167.0 |
165.3 |
166.9 |
62.5 |
62.8 |
58.7 |
68.9 |
D |
Female |
163.6 |
166.2 |
163.7 |
167.7 |
56.9 |
63.9 |
60.5 |
66.9 |
Male |
165.6 |
165.1 |
164.3 |
166.9 |
62.4 |
59.1 |
64.9 |
67.1 |
3.2 索引的常用方法
3.3.1 索引层的交换和删除
import numpy as np
import pandas as pd
np.random.seed(0)
L1,L2,L3 = ['A','B'],['a','b'],['alpha','beta']
mul_index1 = pd.MultiIndex.from_product([L1,L2,L3], names=('Upper', 'Lower','Extra'))
mul_index1
MultiIndex([('A', 'a', 'alpha'),
('A', 'a', 'beta'),
('A', 'b', 'alpha'),
('A', 'b', 'beta'),
('B', 'a', 'alpha'),
('B', 'a', 'beta'),
('B', 'b', 'alpha'),
('B', 'b', 'beta')],
names=['Upper', 'Lower', 'Extra'])
L4,L5,L6 = ['C','D'],['c','d'],['cat','dog']
mul_index2 = pd.MultiIndex.from_product([L4,L5,L6], names=('Big', 'Small', 'Other'))
mul_index2
MultiIndex([('C', 'c', 'cat'),
('C', 'c', 'dog'),
('C', 'd', 'cat'),
('C', 'd', 'dog'),
('D', 'c', 'cat'),
('D', 'c', 'dog'),
('D', 'd', 'cat'),
('D', 'd', 'dog')],
names=['Big', 'Small', 'Other'])
df_ex = pd.DataFrame(np.random.randint(-9,10,(8,8)),index=mul_index1,columns=mul_index2)
df_ex
|
|
Big |
C |
D |
|
|
Small |
c |
d |
c |
d |
|
|
Other |
cat |
dog |
cat |
dog |
cat |
dog |
cat |
dog |
Upper |
Lower |
Extra |
|
|
|
|
|
|
|
|
A |
a |
alpha |
3 |
6 |
-9 |
-6 |
-6 |
-2 |
0 |
9 |
beta |
-5 |
-3 |
3 |
-8 |
-3 |
-2 |
5 |
8 |
b |
alpha |
-4 |
4 |
-1 |
0 |
7 |
-4 |
6 |
6 |
beta |
-9 |
9 |
-6 |
8 |
5 |
-2 |
-9 |
-8 |
B |
a |
alpha |
0 |
-9 |
1 |
-6 |
2 |
9 |
-7 |
-9 |
beta |
-9 |
-5 |
-4 |
-3 |
-1 |
8 |
6 |
-5 |
b |
alpha |
0 |
1 |
-8 |
-8 |
-2 |
0 |
-6 |
-3 |
beta |
2 |
5 |
9 |
-9 |
5 |
-6 |
3 |
1 |
3.4 索引运算
由于集合的元素是互异的,但是索引中可能有相同的元素,先用 unique 去重后再进行运算
df_set_1 = pd.DataFrame([[0,1],[1,2],[3,4]], index = pd.Index(['a','b','a'],name='id1'))
df_set_1
|
0 |
1 |
id1 |
|
|
a |
0 |
1 |
b |
1 |
2 |
a |
3 |
4 |
df_set_2 = pd.DataFrame([[4,5],[2,6],[7,1]],index = pd.Index(['b','b','c'],name='id2'))
df_set_2
|
0 |
1 |
id2 |
|
|
b |
4 |
5 |
b |
2 |
6 |
c |
7 |
1 |
id1, id2 = df_set_1.index.unique(), df_set_2.index.unique()
id1, id2
(Index(['a', 'b'], dtype='object', name='id1'),
Index(['b', 'c'], dtype='object', name='id2'))
id1.intersection(id2)
Index(['b'], dtype='object')
id1.union(id2)
Index(['a', 'b', 'c'], dtype='object')
id1.difference(id2)
Index(['a'], dtype='object')
id1.symmetric_difference(id2)
Index(['a', 'c'], dtype='object')
若两张表需要做集合运算的列并没有被设置索引,一种办法是先转成索引,运算后再恢复,另一种方法是利用isin函数,例如在重置索引的第一张表中选出 id 列交集的所在行:
df_set_in_col_1 = df_set_1.reset_index()
df_set_in_col_1
|
id1 |
0 |
1 |
0 |
a |
0 |
1 |
1 |
b |
1 |
2 |
2 |
a |
3 |
4 |
df_set_in_col_2 = df_set_2.reset_index()
df_set_in_col_2
|
id2 |
0 |
1 |
0 |
b |
4 |
5 |
1 |
b |
2 |
6 |
2 |
c |
7 |
1 |
df_set_in_col_1[df_set_in_col_1.id1.isin(df_set_in_col_2.id2)]
3.5.1 公司员工数据集
- 分别只使用 query 和 loc 选出年龄不超过四十岁且工作部门为 Dairy 或 Bakery 的男性。
- 选出员工 ID 号为奇数所在行的第 1、第 3 和倒数第 2 列。
- 按照以下步骤进行索引操作:
• 把后三列设为索引后交换内外两层
• 恢复中间一层
• 修改外层索引名为 Gender
• 用下划线合并两层行索引
• 把行索引拆分为原状态
• 修改索引名为原表名称
• 恢复默认索引并将列保持为原表的相对位置
df = pd.read_csv('company.csv')
df.head(3)
|
EmployeeID |
birthdate_key |
age |
city_name |
department |
job_title |
gender |
0 |
1318 |
1/3/1954 |
61 |
Vancouver |
Executive |
CEO |
M |
1 |
1319 |
1/3/1957 |
58 |
Vancouver |
Executive |
VP Stores |
F |
2 |
1320 |
1/2/1955 |
60 |
Vancouver |
Executive |
Legal Counsel |
F |
df.query("(age<=40)&(department == ['Dairy', 'Bakery'])&(gender == 'M')")
|
EmployeeID |
birthdate_key |
age |
city_name |
department |
job_title |
gender |
3611 |
5791 |
1/14/1975 |
40 |
Kelowna |
Dairy |
Dairy Person |
M |
3613 |
5793 |
1/22/1975 |
40 |
Richmond |
Bakery |
Baker |
M |
3615 |
5795 |
1/30/1975 |
40 |
Nanaimo |
Dairy |
Dairy Person |
M |
3617 |
5797 |
2/3/1975 |
40 |
Nanaimo |
Dairy |
Dairy Person |
M |
3618 |
5798 |
2/4/1975 |
40 |
Surrey |
Dairy |
Dairy Person |
M |
... |
... |
... |
... |
... |
... |
... |
... |
6108 |
8307 |
10/20/1994 |
21 |
Burnaby |
Dairy |
Dairy Person |
M |
6113 |
8312 |
11/12/1994 |
21 |
Burnaby |
Dairy |
Dairy Person |
M |
6137 |
8336 |
12/31/1994 |
21 |
Vancouver |
Dairy |
Dairy Person |
M |
6270 |
6312 |
5/14/1979 |
36 |
Grand Forks |
Dairy |
Dairy Person |
M |
6271 |
6540 |
2/14/1981 |
34 |
Victoria |
Bakery |
Baker |
M |
441 rows × 7 columns
df.loc[(df.age<=40)& (df.department.isin(['Dairy', 'Bakery']))&(df.gender == 'M')]
|
EmployeeID |
birthdate_key |
age |
city_name |
department |
job_title |
gender |
3611 |
5791 |
1/14/1975 |
40 |
Kelowna |
Dairy |
Dairy Person |
M |
3613 |
5793 |
1/22/1975 |
40 |
Richmond |
Bakery |
Baker |
M |
3615 |
5795 |
1/30/1975 |
40 |
Nanaimo |
Dairy |
Dairy Person |
M |
3617 |
5797 |
2/3/1975 |
40 |
Nanaimo |
Dairy |
Dairy Person |
M |
3618 |
5798 |
2/4/1975 |
40 |
Surrey |
Dairy |
Dairy Person |
M |
... |
... |
... |
... |
... |
... |
... |
... |
6108 |
8307 |
10/20/1994 |
21 |
Burnaby |
Dairy |
Dairy Person |
M |
6113 |
8312 |
11/12/1994 |
21 |
Burnaby |
Dairy |
Dairy Person |
M |
6137 |
8336 |
12/31/1994 |
21 |
Vancouver |
Dairy |
Dairy Person |
M |
6270 |
6312 |
5/14/1979 |
36 |
Grand Forks |
Dairy |
Dairy Person |
M |
6271 |
6540 |
2/14/1981 |
34 |
Victoria |
Bakery |
Baker |
M |
441 rows × 7 columns
(df.EmployeeID%2 == 1).values
array([False, True, False, ..., True, False, False])
df.iloc[(df.EmployeeID%2 == 1).values, [0,2,-2]]
|
EmployeeID |
age |
job_title |
1 |
1319 |
58 |
VP Stores |
3 |
1321 |
56 |
VP Human Resources |
5 |
1323 |
53 |
Exec Assistant, VP Stores |
6 |
1325 |
51 |
Exec Assistant, Legal Counsel |
8 |
1329 |
48 |
Store Manager |
... |
... |
... |
... |
6276 |
7659 |
26 |
Cashier |
6277 |
7741 |
25 |
Cashier |
6278 |
7801 |
25 |
Dairy Person |
6280 |
8181 |
22 |
Cashier |
6281 |
8223 |
21 |
Cashier |
3126 rows × 3 columns
df_new = df.copy()
df_new = df_new.set_index(df_new.columns[-3:].tolist()).swaplevel(0,2,axis = 0)
df_new
|
|
|
EmployeeID |
birthdate_key |
age |
city_name |
gender |
job_title |
department |
|
|
|
|
M |
CEO |
Executive |
1318 |
1/3/1954 |
61 |
Vancouver |
F |
VP Stores |
Executive |
1319 |
1/3/1957 |
58 |
Vancouver |
Legal Counsel |
Executive |
1320 |
1/2/1955 |
60 |
Vancouver |
M |
VP Human Resources |
Executive |
1321 |
1/2/1959 |
56 |
Vancouver |
VP Finance |
Executive |
1322 |
1/9/1958 |
57 |
Vancouver |
... |
... |
... |
... |
... |
... |
... |
F |
Cashier |
Customer Service |
8036 |
8/9/1992 |
23 |
New Westminister |
M |
Cashier |
Customer Service |
8181 |
9/26/1993 |
22 |
Prince George |
Customer Service |
8223 |
2/11/1994 |
21 |
Trail |
F |
Cashier |
Customer Service |
8226 |
2/16/1994 |
21 |
Victoria |
Customer Service |
8264 |
6/13/1994 |
21 |
Vancouver |
6284 rows × 4 columns
df_new = df_new.reset_index(1)
df_new
|
|
job_title |
EmployeeID |
birthdate_key |
age |
city_name |
gender |
department |
|
|
|
|
|
M |
Executive |
CEO |
1318 |
1/3/1954 |
61 |
Vancouver |
F |
Executive |
VP Stores |
1319 |
1/3/1957 |
58 |
Vancouver |
Executive |
Legal Counsel |
1320 |
1/2/1955 |
60 |
Vancouver |
M |
Executive |
VP Human Resources |
1321 |
1/2/1959 |
56 |
Vancouver |
Executive |
VP Finance |
1322 |
1/9/1958 |
57 |
Vancouver |
... |
... |
... |
... |
... |
... |
... |
F |
Customer Service |
Cashier |
8036 |
8/9/1992 |
23 |
New Westminister |
M |
Customer Service |
Cashier |
8181 |
9/26/1993 |
22 |
Prince George |
Customer Service |
Cashier |
8223 |
2/11/1994 |
21 |
Trail |
F |
Customer Service |
Cashier |
8226 |
2/16/1994 |
21 |
Victoria |
Customer Service |
Cashier |
8264 |
6/13/1994 |
21 |
Vancouver |
6284 rows × 5 columns
df_new.rename_axis(index={'gender':'Gender'})
|
|
job_title |
EmployeeID |
birthdate_key |
age |
city_name |
Gender |
department |
|
|
|
|
|
M |
Executive |
CEO |
1318 |
1/3/1954 |
61 |
Vancouver |
F |
Executive |
VP Stores |
1319 |
1/3/1957 |
58 |
Vancouver |
Executive |
Legal Counsel |
1320 |
1/2/1955 |
60 |
Vancouver |
M |
Executive |
VP Human Resources |
1321 |
1/2/1959 |
56 |
Vancouver |
Executive |
VP Finance |
1322 |
1/9/1958 |
57 |
Vancouver |
... |
... |
... |
... |
... |
... |
... |
F |
Customer Service |
Cashier |
8036 |
8/9/1992 |
23 |
New Westminister |
M |
Customer Service |
Cashier |
8181 |
9/26/1993 |
22 |
Prince George |
Customer Service |
Cashier |
8223 |
2/11/1994 |
21 |
Trail |
F |
Customer Service |
Cashier |
8226 |
2/16/1994 |
21 |
Victoria |
Customer Service |
Cashier |
8264 |
6/13/1994 |
21 |
Vancouver |
6284 rows × 5 columns
df_new.index = df_new.index.map(lambda x: '_'.join(x))
df_new
|
job_title |
EmployeeID |
birthdate_key |
age |
city_name |
M_Executive |
CEO |
1318 |
1/3/1954 |
61 |
Vancouver |
F_Executive |
VP Stores |
1319 |
1/3/1957 |
58 |
Vancouver |
F_Executive |
Legal Counsel |
1320 |
1/2/1955 |
60 |
Vancouver |
M_Executive |
VP Human Resources |
1321 |
1/2/1959 |
56 |
Vancouver |
M_Executive |
VP Finance |
1322 |
1/9/1958 |
57 |
Vancouver |
... |
... |
... |
... |
... |
... |
F_Customer Service |
Cashier |
8036 |
8/9/1992 |
23 |
New Westminister |
M_Customer Service |
Cashier |
8181 |
9/26/1993 |
22 |
Prince George |
M_Customer Service |
Cashier |
8223 |
2/11/1994 |
21 |
Trail |
F_Customer Service |
Cashier |
8226 |
2/16/1994 |
21 |
Victoria |
F_Customer Service |
Cashier |
8264 |
6/13/1994 |
21 |
Vancouver |
6284 rows × 5 columns
df_new.index = df_new.index.map(lambda x:tuple(x.split('_')))
df_new
|
|
job_title |
EmployeeID |
birthdate_key |
age |
city_name |
M |
Executive |
CEO |
1318 |
1/3/1954 |
61 |
Vancouver |
F |
Executive |
VP Stores |
1319 |
1/3/1957 |
58 |
Vancouver |
Executive |
Legal Counsel |
1320 |
1/2/1955 |
60 |
Vancouver |
M |
Executive |
VP Human Resources |
1321 |
1/2/1959 |
56 |
Vancouver |
Executive |
VP Finance |
1322 |
1/9/1958 |
57 |
Vancouver |
... |
... |
... |
... |
... |
... |
... |
F |
Customer Service |
Cashier |
8036 |
8/9/1992 |
23 |
New Westminister |
M |
Customer Service |
Cashier |
8181 |
9/26/1993 |
22 |
Prince George |
Customer Service |
Cashier |
8223 |
2/11/1994 |
21 |
Trail |
F |
Customer Service |
Cashier |
8226 |
2/16/1994 |
21 |
Victoria |
Customer Service |
Cashier |
8264 |
6/13/1994 |
21 |
Vancouver |
6284 rows × 5 columns
df_new.rename_axis(index = ['gender', 'department'])
|
|
job_title |
EmployeeID |
birthdate_key |
age |
city_name |
gender |
department |
|
|
|
|
|
M |
Executive |
CEO |
1318 |
1/3/1954 |
61 |
Vancouver |
F |
Executive |
VP Stores |
1319 |
1/3/1957 |
58 |
Vancouver |
Executive |
Legal Counsel |
1320 |
1/2/1955 |
60 |
Vancouver |
M |
Executive |
VP Human Resources |
1321 |
1/2/1959 |
56 |
Vancouver |
Executive |
VP Finance |
1322 |
1/9/1958 |
57 |
Vancouver |
... |
... |
... |
... |
... |
... |
... |
F |
Customer Service |
Cashier |
8036 |
8/9/1992 |
23 |
New Westminister |
M |
Customer Service |
Cashier |
8181 |
9/26/1993 |
22 |
Prince George |
Customer Service |
Cashier |
8223 |
2/11/1994 |
21 |
Trail |
F |
Customer Service |
Cashier |
8226 |
2/16/1994 |
21 |
Victoria |
Customer Service |
Cashier |
8264 |
6/13/1994 |
21 |
Vancouver |
6284 rows × 5 columns
df_new.reset_index().reindex(df.columns, axis=1)
|
EmployeeID |
birthdate_key |
age |
city_name |
department |
job_title |
gender |
0 |
1318 |
1/3/1954 |
61 |
Vancouver |
NaN |
CEO |
NaN |
1 |
1319 |
1/3/1957 |
58 |
Vancouver |
NaN |
VP Stores |
NaN |
2 |
1320 |
1/2/1955 |
60 |
Vancouver |
NaN |
Legal Counsel |
NaN |
3 |
1321 |
1/2/1959 |
56 |
Vancouver |
NaN |
VP Human Resources |
NaN |
4 |
1322 |
1/9/1958 |
57 |
Vancouver |
NaN |
VP Finance |
NaN |
... |
... |
... |
... |
... |
... |
... |
... |
6279 |
8036 |
8/9/1992 |
23 |
New Westminister |
NaN |
Cashier |
NaN |
6280 |
8181 |
9/26/1993 |
22 |
Prince George |
NaN |
Cashier |
NaN |
6281 |
8223 |
2/11/1994 |
21 |
Trail |
NaN |
Cashier |
NaN |
6282 |
8226 |
2/16/1994 |
21 |
Victoria |
NaN |
Cashier |
NaN |
6283 |
8264 |
6/13/1994 |
21 |
Vancouver |
NaN |
Cashier |
NaN |
6284 rows × 7 columns
df_new.equals(df)
False
- 把列索引名中的\n替换为空格。
- 巧克力Rating评分为1至5,每0.25分一档,请选出2.75分及以下且可可含量 Cocoa Percent高于中位数的样本。
- 将Review Date和Company Location设为索引后,选出Review Date在2012年之后且Company Location不属于France, Canada, Amsterdam, Belgium的样本。
dfc = pd.read_csv('chocolate.csv')
dfc.columns = [' '.join(i.split('\n')) for i in dfc.columns]
dfc
|
Company |
Review Date |
Cocoa Percent |
Company Location |
Rating |
0 |
A. Morin |
2016 |
63% |
France |
3.75 |
1 |
A. Morin |
2015 |
70% |
France |
2.75 |
2 |
A. Morin |
2015 |
70% |
France |
3.00 |
3 |
A. Morin |
2015 |
70% |
France |
3.50 |
4 |
A. Morin |
2015 |
70% |
France |
3.50 |
... |
... |
... |
... |
... |
... |
1790 |
Zotter |
2011 |
70% |
Austria |
3.75 |
1791 |
Zotter |
2011 |
65% |
Austria |
3.00 |
1792 |
Zotter |
2011 |
65% |
Austria |
3.50 |
1793 |
Zotter |
2011 |
62% |
Austria |
3.25 |
1794 |
Zotter |
2010 |
65% |
Austria |
3.00 |
1795 rows × 5 columns
dfc['Cocoa Percent'] = dfc['Cocoa Percent'].apply(lambda x: float(x[:-1])/100)
dfc.query('Rating <=2.75 & `Cocoa Percent` > `Cocoa Percent`.median()').head()
|
Company |
Review Date |
Cocoa Percent |
Company Location |
Rating |
33 |
Akesson's (Pralus) |
2010 |
0.75 |
Switzerland |
2.75 |
34 |
Akesson's (Pralus) |
2010 |
0.75 |
Switzerland |
2.75 |
36 |
Alain Ducasse |
2014 |
0.75 |
France |
2.75 |
38 |
Alain Ducasse |
2013 |
0.75 |
France |
2.50 |
39 |
Alain Ducasse |
2013 |
0.75 |
France |
2.50 |
dfc[(dfc['Rating'] <=2.75) & (dfc['Cocoa Percent'] > dfc['Cocoa Percent'].median())]
|
Company |
Review Date |
Cocoa Percent |
Company Location |
Rating |
33 |
Akesson's (Pralus) |
2010 |
0.75 |
Switzerland |
2.75 |
34 |
Akesson's (Pralus) |
2010 |
0.75 |
Switzerland |
2.75 |
36 |
Alain Ducasse |
2014 |
0.75 |
France |
2.75 |
38 |
Alain Ducasse |
2013 |
0.75 |
France |
2.50 |
39 |
Alain Ducasse |
2013 |
0.75 |
France |
2.50 |
... |
... |
... |
... |
... |
... |
1736 |
Wilkie's Organic |
2013 |
0.89 |
Ireland |
2.75 |
1738 |
Wilkie's Organic |
2013 |
0.75 |
Ireland |
2.75 |
1741 |
Willie's Cacao |
2013 |
1.00 |
U.K. |
2.25 |
1769 |
Zart Pralinen |
2016 |
0.85 |
Austria |
2.75 |
1778 |
Zotter |
2014 |
0.80 |
Austria |
2.75 |
239 rows × 5 columns
idx = pd.IndexSlice
dfc= dfc.set_index(['Review Date', 'Company Location']).sort_index(level=0)
dfc.head()
|
|
Company |
Cocoa Percent |
Rating |
Review Date |
Company Location |
|
|
|
2006 |
Belgium |
Cote d' Or (Kraft) |
0.70 |
1.00 |
Belgium |
Dolfin (Belcolade) |
0.70 |
1.50 |
Belgium |
Neuhaus (Callebaut) |
0.73 |
2.00 |
Belgium |
Neuhaus (Callebaut) |
0.75 |
2.75 |
Belgium |
Neuhaus (Callebaut) |
0.71 |
3.00 |
dfc.loc[idx[2012:, dfc.index.get_level_values(1).difference(['France', 'Canada', 'Amsterdam', 'Belgium'])], :]
|
|
Company |
Cocoa Percent |
Rating |
Review Date |
Company Location |
|
|
|
2012 |
Australia |
Bahen & Co. |
0.70 |
3.00 |
Australia |
Bahen & Co. |
0.70 |
2.50 |
Australia |
Bahen & Co. |
0.70 |
2.50 |
Australia |
Cravve |
0.75 |
3.25 |
Australia |
Cravve |
0.65 |
3.25 |
... |
... |
... |
... |
... |
2017 |
U.S.A. |
Spencer |
0.70 |
3.75 |
U.S.A. |
Spencer |
0.70 |
3.50 |
U.S.A. |
Spencer |
0.70 |
2.75 |
U.S.A. |
Xocolla |
0.70 |
2.75 |
U.S.A. |
Xocolla |
0.70 |
2.50 |
972 rows × 3 columns