希望你耐心看完,那么你将学会怎么通过阅读源码自学.得到的将不仅仅是会使用一个函数
python :3.6.5
pandas :0.25.1
下面的代码块是摘抄自D:\Program Files\envs\test\Lib\site-packages\pandas\io\excel_base.py
@Appender(_read_excel_doc)
@deprecate_kwarg("skip_footer", "skipfooter")
def read_excel(io,sheet_name=0,header=0,names=None,index_col=None,usecols=None,squeeze=False,dtype=None,engine=None,
converters=None,true_values=None,false_values=None,skiprows=None,nrows=None,na_values=None,keep_default_na=True,
verbose=False,parse_dates=False,date_parser=None,thousands=None,comment=None,skip_footer=0,skipfooter=0,
convert_float=True,mangle_dupe_cols=True,**kwds):
for arg in ("sheet", "sheetname", "parse_cols"):
if arg in kwds:
raise TypeError(
"read_excel() got an unexpected keyword argument " "`{}`".format(arg)
)
if not isinstance(io, ExcelFile):
io = ExcelFile(io, engine=engine)
elif engine and engine != io.engine:
raise ValueError(
"Engine should not be specified when passing "
"an ExcelFile - ExcelFile already has the engine set"
)
return io.parse(
sheet_name=sheet_name,
header=header,
names=names,
index_col=index_col,
usecols=usecols,
squeeze=squeeze,
dtype=dtype,
converters=converters,
true_values=true_values,
false_values=false_values,
skiprows=skiprows,
nrows=nrows,
na_values=na_values,
keep_default_na=keep_default_na,
verbose=verbose,
parse_dates=parse_dates,
date_parser=date_parser,
thousands=thousands,
comment=comment,
skipfooter=skipfooter,
convert_float=convert_float,
mangle_dupe_cols=mangle_dupe_cols,
**kwds
)
"""
io : str, ExcelFile, xlrd.Book, path object or file-like object
Any valid string path is acceptable. The string could be a URL. Valid
URL schemes include http, ftp, s3, and file. For file URLs, a host is
expected. A local file could be: ``file://localhost/path/to/table.xlsx``.
If you want to pass in a path object, pandas accepts any ``os.PathLike``.
By file-like object, we refer to objects with a ``read()`` method,
such as a file handler (e.g. via builtin ``open`` function)
or ``StringIO``.
"""
例子:
In [1] : at = pd.read_excel("C:\\Users\\admin\\Desktop\\at.xls")
In [2] : at.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8108 entries, 0 to 8107
Data columns (total 4 columns):
序号 8108 non-null int64
申请编码 8108 non-null int64
姓名 8108 non-null object
证件号 8108 non-null object
dtypes: int64(2), object(2)
memory usage: 253.5+ KB
In [3] : at.head()
Out[4] :
序号 申请编码 姓名 证件号
0 1 821102274825 艾文杰 23052419******2417
1 2 3762102138647 白银桃 42102319******4125
2 3 3335100895505 包彩骏 33012719******3111
3 4 9000101269840 包浩浩 33012719******2014
4 5 9186100202579 包建明 33012719******3318
赋值 | 解释 |
---|---|
sheet_name = 0 (默认为0) | 第一张工作表作为DataFrame |
sheet_name = 1 | 第二张工作表为DataFrame |
sheet_name = “Sheet1” | 名字为"Sheet1"的表作为DataFrame |
sheet_name = [0,1,“Sheet5”] | 第一张工作表,第二张工作表和名字为"Sheet5"的工作表作为字典结构的DataFrame |
sheet_name = None | 所有的工作表作为字典结构的DataFrame |
"""
sheet_name : str, int, list, or None, default 0
Strings are used for sheet names. Integers are used in zero-indexed
sheet positions. Lists of strings/integers are used to request
multiple sheets. Specify None to get all sheets.
Available cases:
* Defaults to ``0``: 1st sheet as a `DataFrame`
* ``1``: 2nd sheet as a `DataFrame`
* ``"Sheet1"``: Load sheet with name "Sheet1"
* ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5"
as a dict of `DataFrame`
* None: All sheets.
"""
例子:
In [56]: at = pd.read_excel('C:\\Users\\admin\\Desktop\\at.xls',sheet_name=0)
In [57]: at.head()
Out[57]:
序号 申请编码 姓名 证件号
0 1 821102274825 艾文杰 23052419******2417
1 2 3762102138647 白银桃 42102319******4125
2 3 3335100895505 包彩骏 33012719******3111
3 4 9000101269840 包浩浩 33012719******2014
4 5 9186100202579 包建明 33012719******3318
In [58]: type(at)
Out[58]: pandas.core.frame.DataFrame
In [62]: ats = pd.read_excel('C:\\Users\\admin\\Desktop\\at.xls',sheet_name=None)
In [63]: ats
Out[63]:
OrderedDict([('Sheet1', 序号 申请编码 姓名 证件号
0 1 821102274825 艾文杰 23052419******2417
1 2 3762102138647 白银桃 42102319******4125
2 3 3335100895505 包彩骏 33012719******3111
3 4 9000101269840 包浩浩 33012719******2014
4 5 9186100202579 包建明 33012719******3318
... ... ... ... ...
8103 8104 5066101495593 荘新宝 33012719******3919
8104 8105 1149101603138 邹晶晶 33012719******032X
8105 8106 817102313810 左常春 33012719******1234
8106 8107 5483102451796 左常青 33012719******122X
8107 8108 2266100036155 左建琴 33012719******1229
[8108 rows x 4 columns]),
('Sheet2', 29 71102051772 鲍建飞 33012719******4218
0 30 6372100223138 鲍建飞 33012719******5715
1 31 8135102783848 鲍建恒 33012719******2018
2 32 1563100577687 鲍建民 33012719******0213
3 33 8256101447949 鲍金文 33012719******3213
4 34 8377101988262 鲍军 33012719******3217
5 35 6119102222613 鲍军华 33012719******5412)])
In [64]: type(ats)
Out[64]: collections.OrderedDict
collections.OrderedDict (不懂的请自行百度)
"""
header : int, list of int, default 0
Row (0-indexed) to use for the column labels of the parsed
DataFrame. If a list of integers is passed those row positions will
be combined into a ``MultiIndex``. Use None if there is no header.
"""
例子:
不加header参数则默认为header=0
header =1
header =2
In [76]: pd.read_excel('C:\\Users\\admin\\Desktop\\at.xls',sheet_name=1)
Out[76]:
29 71102051772 鲍建飞 33012719******4218
0 30 6372100223138 鲍建飞 33012719******5715
1 31 8135102783848 鲍建恒 33012719******2018
2 32 1563100577687 鲍建民 33012719******0213
3 33 8256101447949 鲍金文 33012719******3213
4 34 8377101988262 鲍军 33012719******3217
5 35 6119102222613 鲍军华 33012719******5412
In [78]: pd.read_excel('C:\\Users\\admin\\Desktop\\at.xls',sheet_name=1,header=0)
Out[78]:
29 71102051772 鲍建飞 33012719******4218
0 30 6372100223138 鲍建飞 33012719******5715
1 31 8135102783848 鲍建恒 33012719******2018
2 32 1563100577687 鲍建民 33012719******0213
3 33 8256101447949 鲍金文 33012719******3213
4 34 8377101988262 鲍军 33012719******3217
5 35 6119102222613 鲍军华 33012719******5412
In [79]: pd.read_excel('C:\\Users\\admin\\Desktop\\at.xls',sheet_name=1,header=1)
Out[79]:
30 6372100223138 鲍建飞 33012719******5715
0 31 8135102783848 鲍建恒 33012719******2018
1 32 1563100577687 鲍建民 33012719******0213
2 33 8256101447949 鲍金文 33012719******3213
3 34 8377101988262 鲍军 33012719******3217
4 35 6119102222613 鲍军华 33012719******5412
header = None
In [77]: pd.read_excel('C:\\Users\\admin\\Desktop\\at.xls',sheet_name=1,header=None)
Out[77]:
0 1 2 3
0 29 71102051772 鲍建飞 33012719******4218
1 30 6372100223138 鲍建飞 33012719******5715
2 31 8135102783848 鲍建恒 33012719******2018
3 32 1563100577687 鲍建民 33012719******0213
4 33 8256101447949 鲍金文 33012719******3213
5 34 8377101988262 鲍军 33012719******3217
6 35 6119102222613 鲍军华 33012719******5412
"""
names : array-like, default None
List of column names to use. If file contains no header row,
then you should explicitly pass header=None.
"""
#数据中没有标题行,则header = None,names默认为None
In [31]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1 ,header = None)
Out[31]:
0 1 2 3
0 1 71102051772 鲍建飞 33012719******4218
1 2 6372100223138 鲍建飞 33012719******5715
2 3 8135102783848 鲍建恒 33012719******2018
3 4 1563100577687 鲍建民 33012719******0213
4 5 8256101447949 鲍金文 33012719******3213
5 6 8377101988262 鲍军 33012719******3217
6 7 6119102222613 鲍军华 33012719******5412
In [43]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 0,names = None)
Out[43]:
序号 申请编码 姓名 证件号
0 1 821102274825 艾文杰 23052419******2417
1 2 3762102138647 白银桃 42102319******4125
2 3 3335100895505 包彩骏 33012719******3111
3 4 9000101269840 包浩浩 33012719******2014
4 5 9186100202579 包建明 33012719******3318
... ... ... ... ...
8103 8104 5066101495593 荘新宝 33012719******3919
8104 8105 1149101603138 邹晶晶 33012719******032X
8105 8106 817102313810 左常春 33012719******1234
8106 8107 5483102451796 左常青 33012719******122X
8107 8108 2266100036155 左建琴 33012719******1229
[8108 rows x 4 columns]
# 若要将header修改为a,b,c,d,则赋值names = ['a','b','c','d']
In [45]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 0,names=['a','b','c','d'])
Out[45]:
a b c d
0 1 821102274825 艾文杰 23052419******2417
1 2 3762102138647 白银桃 42102319******4125
2 3 3335100895505 包彩骏 33012719******3111
3 4 9000101269840 包浩浩 33012719******2014
4 5 9186100202579 包建明 33012719******3318
... ... ... ... ...
8103 8104 5066101495593 荘新宝 33012719******3919
8104 8105 1149101603138 邹晶晶 33012719******032X
8105 8106 817102313810 左常春 33012719******1234
8106 8107 5483102451796 左常青 33012719******122X
8107 8108 2266100036155 左建琴 33012719******1229
[8108 rows x 4 columns]
"""
index_col : int, list of int, default None
Column (0-indexed) to use as the row labels of the DataFrame.
Pass None if there is no such column. If a list is passed,
those columns will be combined into a ``MultiIndex``. If a
subset of data is selected with ``usecols``, index_col
is based on the subset.
"""
#默认是增加None行索引。
In [57]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1,header = None,index_col=None)
Out[57]:
0 1 2 3
0 1 71102051772 鲍建飞 33012719******4218
1 2 6372100223138 鲍建飞 33012719******5715
2 3 8135102783848 鲍建恒 33012719******2018
3 4 1563100577687 鲍建民 33012719******0213
4 5 8256101447949 鲍金文 33012719******3213
5 6 8377101988262 鲍军 33012719******3217
6 7 6119102222613 鲍军华 33012719******5412
#“0”所在的列作为了新的索引,但是列数不变,列对应的数据不变
In [58]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1,header = None,index_col=0)
Out[58]:
1 2 3
0
1 71102051772 鲍建飞 33012719******4218
2 6372100223138 鲍建飞 33012719******5715
3 8135102783848 鲍建恒 33012719******2018
4 1563100577687 鲍建民 33012719******0213
5 8256101447949 鲍金文 33012719******3213
6 8377101988262 鲍军 33012719******3217
7 6119102222613 鲍军华 33012719******5412
#“1”所在的列作为了新的索引,但是列数不变,列对应的数据不变
In [59]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1,header = None,index_col=1)
Out[59]:
0 2 3
1
71102051772 1 鲍建飞 33012719******4218
6372100223138 2 鲍建飞 33012719******5715
8135102783848 3 鲍建恒 33012719******2018
1563100577687 4 鲍建民 33012719******0213
8256101447949 5 鲍金文 33012719******3213
8377101988262 6 鲍军 33012719******3217
6119102222613 7 鲍军华 33012719******5412
"""
usecols : int, str, list-like, or callable default None
Return a subset of the columns.
* If None, then parse all columns.
* If int, then indicates last column to be parsed.
.. deprecated:: 0.24.0
Pass in a list of int instead from 0 to `usecols` inclusive.
* If str, then indicates comma separated list of Excel column letters
and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of
both sides.
* If list of int, then indicates list of column numbers to be parsed.
* If list of string, then indicates list of column names to be parsed.
.. versionadded:: 0.24.0
* If callable, then evaluate each column name against it and parse the
column if the callable returns ``True``.
.. versionadded:: 0.24.0
"""
#默认为None,解析所有列
In [65]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1,header = None)
Out[65]:
0 1 2 3
0 1 71102051772 鲍建飞 33012719******4218
1 2 6372100223138 鲍建飞 33012719******5715
2 3 8135102783848 鲍建恒 33012719******2018
3 4 1563100577687 鲍建民 33012719******0213
4 5 8256101447949 鲍金文 33012719******3213
5 6 8377101988262 鲍军 33012719******3217
6 7 6119102222613 鲍军华 33012719******5412
#usecols = [1,2],解析“1”,“2”所在的列
In [66]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1,header = None,usecols=[1,2])
Out[66]:
1 2
0 71102051772 鲍建飞
1 6372100223138 鲍建飞
2 8135102783848 鲍建恒
3 1563100577687 鲍建民
4 8256101447949 鲍金文
5 8377101988262 鲍军
6 6119102222613 鲍军华
"""
squeeze : bool, default False
If the parsed data only contains one column then return a Series.
dtype : Type name or dict of column -> type, default None
Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
Use `object` to preserve data as stored in Excel and not interpret dtype.
If converters are specified, they will be applied INSTEAD
of dtype conversion.
.. versionadded:: 0.20.0
"""
"""
skiprows : list-like
Rows to skip at the beginning (0-indexed).
"""
In [75]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1,header = None,skiprows=0)
Out[75]:
0 1 2 3
0 1 71102051772 鲍建飞 33012719******4218
1 2 6372100223138 鲍建飞 33012719******5715
2 3 8135102783848 鲍建恒 33012719******2018
3 4 1563100577687 鲍建民 33012719******0213
4 5 8256101447949 鲍金文 33012719******3213
5 6 8377101988262 鲍军 33012719******3217
6 7 6119102222613 鲍军华 33012719******5412
In [76]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1,header = None,skiprows=1)
Out[76]:
0 1 2 3
0 2 6372100223138 鲍建飞 33012719******5715
1 3 8135102783848 鲍建恒 33012719******2018
2 4 1563100577687 鲍建民 33012719******0213
3 5 8256101447949 鲍金文 33012719******3213
4 6 8377101988262 鲍军 33012719******3217
5 7 6119102222613 鲍军华 33012719******5412
"""
nrows : int, default None
Number of rows to parse.
"""
In [78]: pd.read_excel('C:\\Users\\Administrator\\Desktop\\at.xls',sheet_name= 1,header = None,nrows=3)
Out[78]:
0 1 2 3
0 1 71102051772 鲍建飞 33012719******4218
1 2 6372100223138 鲍建飞 33012719******5715
2 3 8135102783848 鲍建恒 33012719******2018
其他的参数暂时还没有使用到,以后再做补充。