七. How do I sort a Pandas Dataframe or a Series
# coding: utf-8
# In[77]:
import pandas as pd
# In[79]:
movies = pd.read_csv('http://bit.ly/imdbratings')
# In[80]:
movies.head()
# In[83]:
movies['title'].sort_values(ascending = False)
# In[95]:
#ascending = False 使得其降序排列
movies.sort_values('star_rating', ascending = False)
# In[94]:
movies.sort_values(['star_rating', 'duration'])
八. How do I filter rows of pandas Dataframe by column value
# coding: utf-8
# In[96]:
import pandas as pd
# In[98]:
movies = pd.read_csv('http://bit.ly/imdbratings')
# In[99]:
movies.head()
# In[100]:
movies.shape
# In[101]:
type(True)
# In[102]:
booleans = []
for length in movies.duration:
if length >= 200:
booleans.append(True)
else:
booleans.append(False)
# In[103]:
booleans[0:5]
# In[104]:
len(booleans)
# In[105]:
#is_long = pd.Series(booleans)
# In[108]:
is_long = movies.duration >= 200
is_long.head()
# In[120]:
#Here it only shows the movies with no less than 200 duration
#movies[is_long]
#或者替代方法
#movies.loc[movies.duration >= 200 ,'genre']
#又或者替代方法
movies[movies.duration >= 200]['genre']
九. How do I apply multiple filter criteria to a Pandas Dataframe
# coding: utf-8
# In[1]:
import pandas as pd
# In[2]:
movies = pd.read_csv('http://bit.ly/imdbratings')
# In[3]:
movies.head()
# In[4]:
movies[movies.duration >= 200]
# In[7]:
movies[(movies.duration >= 200) & (movies.genre == 'Drama')]
# In[8]:
movies[(movies.genre == 'Drama') | (movies.genre == 'Drama')]
十. Your Pandas Questions answered
# coding: utf-8
# In[1]:
import pandas as pd
# In[3]:
ufo = pd.read_csv('http://bit.ly/uforeports')
# In[4]:
ufo.columns
# In[7]:
ufo = pd.read_csv('http://bit.ly/uforeports',usecols=[0,4])
ufo.columns
# In[8]:
ufo = pd.read_csv('http://bit.ly/uforeports',nrows = 3)
ufo
# In[10]:
for c in ufo.City:
print(c)
# In[11]:
for index, row in ufo.iterrows():
print(index, row.City, row.State)
# In[12]:
#The best way to drop every non-numeric column for a dataframe
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
# In[13]:
drinks.dtypes
# In[14]:
import numpy as np
drinks.select_dtypes(include =[np.number]).dtypes
# In[15]:
drinks.describe()
十一. How do I use 'axis' parameter in Pandas
# coding: utf-8
# In[16]:
import pandas as pd
# In[17]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
# In[18]:
drinks.head()
# In[19]:
drinks.drop('continent',axis=1).head()
# In[20]:
drinks.drop(2,axis=0).head()
# In[22]:
#这里是因为,默认的设置是drinks.mean(axis = 0)
drinks.mean()
# In[23]:
drinks.mean(axis = 1)
# In[25]:
drinks.mean(axis = 0).shape
十二. How do I use string methods in Pandas
# coding: utf-8
# In[26]:
'hello'.upper()
# In[27]:
import pandas as pd
# In[28]:
orders = pd.read_table('http://bit.ly/chiporders')
# In[30]:
orders.head()
# In[35]:
orders.choice_description.str.replace('[','').str.replace(']','')
# In[31]:
orders[orders.item_name.str.contains('Chicken')]