本文是基于Windows系统环境,学习和测试pandas模块:
Windows 10
PyCharm 2018.3.5 for Windows (exe)
python 3.6.8 Windows x86 executable installer
import pandas as pd
data = pandas.read_csv('test.txt', sep=',', header=None)
print(data)
import pandas as pd
data = pandas.read_csv('test.txt')
index = 3
printf(data.ix[index]) # 读取第三行
import pandas as pd
data = pandas.read_csv('test.txt')
printf(data['ID']) # 读取属性名为ID的列,区分大小写
import pandas as pd
data = pd.read_csv('user.csv')
data.head(5) # 获取前5行
import pandas as pd
import pymysql
table = 'user' # 数据表名称
conn = pymysql.connect(host="localhost", user="root", password="root", port="3306", charset="utf8")
sql_select = "SELECT * FROM " + table
df = pd.read_sql(sql_select, conn)
print(df)
df.to_csv("data.csv", sep=",", header=True, index=0)
# 保存属性名,不保存行号
df.to_json("data.json")
import pandas as pd
import pymysql
from sqlalchemy import create_engine
data = pd.read_csv('data.csv', names=['id', 'name', 'password'], header=0)
connect = create_engine('mysql+pymysql://root:123456@localhost:3306/idam?charset=utf8')
data.to_sql(name='user', con=connect, if_exists='append')
import pandas as pd
data = pd.read_csv('user.csv')
print(data)
#删除/选取某列含有特定数值的行
#data[data['A'].isin([1])] # 选取df1中A列包含数字1的行
data=data[~data['A'].isin([1])] # 通过~取反,选取不包含数字1的行
print(data)
cols=[x for i,x in enumerate(df2.columns) if df2.iat[0,i]==3]
#利用enumerate对row0进行遍历,将含有数字3的列放入cols中
print(cols)
#df2=df2[cols] 选取含有特定数值的列
df2=df2.drop(cols,axis=1) #利用drop方法将含有特定数值的列删除
print(df2)
import pandas as pd
import numpy as np
df1 = pd.DataFrame(
[
[np.nan, 2, np.nan, 0],
[3, 4, np.nan, 1],
[np.nan, np.nan, np.nan, 5],
[np.nan, 3, np.nan, 4]
],columns=list('ABCD'))
print(df1)
df2=df1.copy()
df1['A']=df1['A'].fillna('null') #将df中A列所有空值赋值为'null'
print(df1)
df1=df1[~df1['A'].isin(['null'])]
print(df1)
#删除某行空值所在列
df2[0:1]=df2[0:1].fillna('null')
print(df2)
cols=[x for i,x in enumerate(df2.columns) if df2.iat[0,i]=='null']
print(cols)
df2=df2.drop(cols,axis=1)
print(df2)
import pandas as pd
data = pd.read_csv('user.csv')
print (data.describe()) # 打印详细信息
import pandas as pd
data = pd.read_csv('user.csv')
print (data['userAge'].median()) # 统计userAge这一列的中值
import pandas as pd
data = pd.read_csv('user.csv')
print (data['userName'].unique()) #打印某一列不重复的值
import pandas as pd
data = pd.read_csv('user.csv')
data['userAge'] = data['userAge'].fillna(data['userAge'].median())
import matplotlib as mpl
import pandas as pd
import numpy as np
from pandas import Series
import matplotlib.pyplot as plt
ds1 = Series([3.4, 3.5, 3.4, 3.3])
ds2 = Series(np.arange(1, len(ds1) + 1, 1))
ds3 = Series([0, 1, 0, 1])
df = pd.DataFrame()
df['index'] = ds2
df['value'] = ds1
df['label'] = ds3
def colormap():
return mpl.colors.LinearSegmentedColormap.from_list('cmap', ['#0000FF', '#FF0000'], 256)
df.plot.scatter(x='index', y='value', c='label', colormap=colormap())
plt.show()
使用pandas读取文件
Python 使用pandas读取文件以及基本处理
pandas.DataFrame删除/选取含有特定数值的行或列