我是在B站跟着蚂蚁学python学的课程。
#1、list转series
import pandas as pd
import numpy as np
course = ["Chinese","Math","English","Computer"]
data1 = pd.Series(data=course)
#2、dict转series
grades = {"Chinese":80,"Math":90,"English":85,"Computer":100}
data2 = pd.Series(data=grades)
#3、Series转换为list
Numbers = data2.tolist()
#4、将Series转为DataFrame
df = pd.DataFrame(data2,columns=['grade'])
print(df)
#5、借助numpy创建Series
s = pd.Series(
np.arange(10,100,10),#数值: 10~90,间a 隔10
index=np.arange(101,110),#索引: 101~109,间隔1
dtype='float64'#类型: float64
)
print(s)
#6、转换Series的数据类型,输入为字符串类型,要求输出为数值类型
s5 = pd.Series(
data=["001","002","003","004"],
index=list("abcd")
)
#s5 = s5.astype(int)
s5 = s5.map(int)
print(s5)
#7、给Series添加元素
data7 = data2.append(pd.Series({"physics":88,"chemic":95}))
print(data7)
#8、用reset_index将Series转换成df
df = data2.reset_index()
df.columns = ['course','grade']
print(df)
#9、使用字典创建DataFrame
df9 = pd.DataFrame(
{
"name":["xiaozhang","xiaowang","xiaoli","xiaozhao"],
"gender":["male","female","male","female"],
"age":["18","19","20","18"]
}
)
print(df9)
#10、给DataFrame设置索引列
df9.set_index("name",inplace=True)
print(df9)
#11、生成一个月份所有日期
data_range = pd.date_range(start='2021-10-01',end='2021-10-31')
#字符串中提取数值特征
#df['mileage'].map(lambda x : float(x.split(" ")[0]))
import re
test = "12.7 @ 2,700(kgm@ rpm)"
def parse_rpm(torque):
torque = torque.replace(",","")
return max([float(x) for x in re.findall("\d+",torque)])
#df["torque"].map(parse_rpm)
#12 生成一年的所有周一日期
date_range = pd.date_range(start='2021-01-01',end='2021-12-31',freq='W-MON')
print(date_range)
date_range2 = pd.date_range(start='2021-01-01',periods=52,freq='W-MON')
print(date_range2)
#13 生成一天的所有时间
date_range3 = pd.date_range(start='2021-10-01',periods=24,freq='H')
print(date_range3)
#14 用日期生成dataframe
date_range4 = pd.date_range(start='2021-10-01',periods=31)
df = pd.DataFrame(data=date_range4,columns=['day'])
df['day_of_year'] = df['day'].dt.dayofyear
print(df)
#15 使用日期和随机数生成dataframe
date_range5 = pd.date_range(start='2021-01-01',periods=1000)
data = {
'norm':np.random.normal(loc=0,scale=1),
'uniform':np.random.uniform(low=0,high=1),
'binomal':np.random.binomal(n=1,p=0.2)
}
df = pd.DataFrame(data=data, index=date_range)
print(df)
#16 打印DateFrame的前后数据行
print(df.head(10))
print()
print(df.tail(10))
#17 df的信息和基本数据统计
df.info()
print()
df.describe()
#18 统计数据列的值出现次数
values_count()
#19 df前N行存入CSV文件
to_csv()
#20 加载csv文件到df
read_csv(data,index_col)
#21 加载股票数据csv文件
read_csv()
#22 股票的信息和基本数据统计
data.info()
data.describe()
#23 更改索引列为普通数据列
data.reset_index(inplace=True)
#24 给股票数据新增年份和月份
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
#25 计算股票每年份的平均收盘价
df.groupby('Year')['Close'].mean