import pandas as pd
df = pd.read_csv("data/dogName.csv")
带标签的一维数组;构成:键 值
t = pd.Series([1,2,3,4,5])
t2 = pd.Series([1,2,3,4,5],index=list("abcde"))
temp_dict = {"name":"xiaohong","age":30,"tel":10086}
a = pd.Series(temp_dict)
a[:]
a["tel"]
a[:2]
a[["name","age","f"]]#没有关键字显示NAN
t[t>4]
a.index
list(a.index)
a.values
二维,行索引index(axis=0),列索引columns(axis=1)
pd.DataFrame(np.arange(12).reshape(3,4))
pd.DataFrame(np.arange(12).reshape(3,4),index=list("abc"),columns=list("XYZW"))
d1 = {"name":["小红","小刚"],"age":[22,25],"tel":[10086,10010]}
pd.DataFrame(d1)
d2 = [{"name":"xiaoming","age":11,"tel":10086},{"name":"xiaozhang","age":12},{"name":"liming"}]
m = pd.DataFrame(d2)
import pandas as pd
df = pd.read_csv("data/dogName.csv")
df = df.sort_values(by="Count_AnimalName",ascending=False)
print(df.head(5))
取行取列 []中写数组表示对行操作,[]中写字符串,表示对列操作
print(df[:20])
print(df["Row_Labels"])
print(df[:20]["Row_Labels"])
loc函数 通过标签做获取行数据
iloc函数 通过位置获取行数据
n.loc["a","Z"] #2
n.loc["a",:] #a行
n.loc[["a","c"],]#a行 c行
n.iloc[1]#取第一行
n.iloc[:,2]#取第二列
n.iloc[1:,2:] #1行下,2列后
pd.isnull(m)
删除NAN所在的行 all全部满足NAN any部分满足NAN inplace原地修改
m.dropna(axis=0,how="all",inplace = False)
m.fillna(0)
m.fillna(m.mean())
某一列NAN不部分填充中值数据
m["age"] = m["age"].fillna(m["age"].mean())
#处理0表示缺失的0的数据
m[m==0] = np.nan