pandas_learning

 1,普通操作

import random

import numpy as np
import pandas
import pandas as pd
import seaborn

# u = pd.DataFrame(np.random.randn(3, 4), columns=list('abcd'), index=list('123'))
# print(u)

t = pd.Series([-1, -1, -1, 2, 3, 1, 4, -4, 4, 4])
t_u = pd.DataFrame({'a': t}, columns=['a'])
# print(t_u)

obj = pandas.Series([-1, -1, -1, 2, 3, 1, 4, -4, 4])
# print(obj.unique())
jlj = pd.Series(obj).value_counts()
# print(jlj)
mask = obj.isin([-1, 4])
# print(mask)  # mask 是一串逻辑词

# print(obj[mask])

tt = [np.random.randint(10) for p in range(10)]
# print(tt)

data = pd.DataFrame({'q1': [4, 0, 5, 2, 5, 1, 0, 1, 9, 8],
                     'q2': [4, 2, 7, 4, 6, 1, 3, 2, 3, 2],
                     'q3': [2, 5, 5, 3, 5, 2, 6, 2, 2, 2]
                     })

result = data.apply(pd.value_counts).fillna(0)
# result = pd.Series(data).apply(value_counts).fillna(0)
# print(data,'\n',result)


data_already = [1,2,3,4,5,6]
weight = [1,1,1,4,4,4]
list_num = random.choices(data_already, weights = weight,k = 2000)
s = pd.Series(list_num)
count_1 = (s==1).sum()
print(count_1)

data_Series = pd.Series(list_num)
dup_data_1 = pd.Series(data_Series).value_counts()   # dup_data_1 本身是一个series
# print(dup_data_1)
print(dup_data_1[1])

2,match(map)操作


data_1 = pd.DataFrame({"food": ["bacon", "pulled pork", "Bacon",
                                "Pastrami", "Corned beef", "bacon",
                                "pastrami", "honey ham", "nova lox"],
                       "ounces": [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})

meat_to_animal = {
    "bacon": "pig",
    "pulled pork": "pig",
    "pastrami": "cow",
    "corned beef": "cow",
    "honey ham": "pig",
    "nova lox": "salmon"
}

lowercased = data_1['food'].str.lower()
# print(lowercased)
# print(data_1)

# 注意这个lowercased和map的使用
data_1['animal'] = lowercased.map(meat_to_animal)
# print(data_1)

data_1['animal'] = data_1['food'].map(lambda x: meat_to_animal[x.lower()])
# print(data_1)

data_3 = pd.DataFrame(np.arange(12).reshape((3, 4)),
                      index=["Ohio", "Colorado", "New York"],
                      columns=["one", "two", "three", "four"])

# transform = lambda x: x[:4].upper() # 对x前4个元素大写
transform = lambda x: x.upper()
data_3.columns = data_3.columns.map(transform)  # map会挨个对data_3.columns的元素做运算
data_3.index = data_3.index.map(transform)

# print(data_3)

# data_4 = data_3.rename(index = str.title,columns=str.lower)
# data_3
# data_4
# 

将food和animal对应起来,并加入到dataframe新的一栏

3, 替换DataFrame里的元素

data = pd.DataFrame(np.random.randn(1000, 4))

col = data[1]
# print(col[np.abs(col)>3])

# data本身是dataframe变量,可以作为内部数据的代名词代入函数运算
row_bigger_than_3 = data[(np.abs(data) > 3).any(axis=1)]  # any()括号内部需要关键字axis,or 报错

# data括号内可以设置条件,表示只对满足条件的元素操作
data[(np.abs(data)>3)] = np.sign(data)*3
print(row_bigger_than_3)
u_index = row_bigger_than_3.index
# DataFrame 默认索引是列索引,可使用loc[rows,columns],只写一项则默认是rows索引
print(data.loc[u_index])
print(np.sign(data))

你可能感兴趣的:(python)