# read_csv读入DataFrame
import pandas as pd
df = pd.read_csv(r'/Users/faye/Desktop/examples/ex1.csv')
df
>>>
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [4]:
# read_table读入,并指定分隔符
pd.read_table(r'/Users/faye/Desktop/examples/ex1.csv', sep = ',')
/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: read_table is deprecated, use read_csv instead.
Out[4]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [7]:
# 原文件无标题行,分配默认的列名
pd.read_csv(r'/Users/faye/Desktop/examples/ex2.csv',header = None)
Out[7]:
0 1 2 3 4
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [9]:
# 自定义列名
pd.read_csv(r'/Users/faye/Desktop/examples/ex2.csv',names = ['a','b','c','d','message'])
Out[9]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
In [10]:
# index_col指定索引位置
pd.read_csv(r'/Users/faye/Desktop/examples/ex2.csv',names = ['a','b','c','d','message'],index_col = 'message')
Out[10]:
a b c d
message
hello 1 2 3 4
world 5 6 7 8
foo 9 10 11 12
In [12]:
# 将多个列转化成层次化索引
parsed = pd.read_csv(r'/Users/faye/Desktop/examples/csv_mindex.csv',index_col = ['key1','key2'])
parsed
Out[12]:
value1 value2
key1 key2
one a 1 2
b 3 4
c 5 6
d 7 8
two a 9 10
b 11 12
c 13 14
d 15 16
In [13]:
# 正则表达式\s+作为read_table
result = pd.read_table(r'/Users/faye/Desktop/examples/ex3.txt',sep = '\s+')
result
/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: read_table is deprecated, use read_csv instead.
Out[13]:
A B C
aaa -0.264438 -1.026059 -0.619500
bbb 0.927272 0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382 1.100491
In [14]:
# skiprows跳过文件的行
pd.read_csv(r'/Users/faye/Desktop/examples/ex4.csv',skiprows = [0,2,3]) # 跳过第一、三、四行
Out[14]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
read_csv/read_table函数的参数: