$ python
Python 2.7.16 (default, Jan 27 2020, 04:46:15)
[GCC 4.2.1 Compatible Apple LLVM 10.0.1 (clang-1001.0.37.14)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import pandas as pd # 0.24.2
>>>
>>> datas = [{
"one":5,"two":10,"three":15}] * 100000
>>> print('length:', len(datas))
('length:', 100000)
>>> df1 = pd.DataFrame(datas)
>>>
>>> # engine : str, optional
>>> # Write engine to use, ‘openpyxl’ or ‘xlsxwriter’. You can also set this via the options io.excel.xlsx.writer, io.excel.xls.writer, and io.excel.xlsm.writer.
>>> # 根据文件扩展名自动选择默认引擎
>>> # 保存为.xls文件报错,数据不能超过65536行
>>> df1.to_excel('test10.xls', index=False)
Traceback (most recent call last):
File "" , line 1, in <module>
File "/Library/Python/2.7/site-packages/pandas/core/generic.py", line 2118, in to_excel
engine=engine)
File "/Library/Python/2.7/site-packages/pandas/io/formats/excel.py", line 662, in write
freeze_panes=freeze_panes)
File "/Library/Python/2.7/site-packages/pandas/io/excel.py", line 1720, in write_cells
val, style)
File "/Library/Python/2.7/site-packages/xlwt/Worksheet.py", line 1088, in write
self.row(r).write(c, label, style)
File "/Library/Python/2.7/site-packages/xlwt/Worksheet.py", line 1142, in row
self.__rows[indx] = self.Row(indx, self)
File "/Library/Python/2.7/site-packages/xlwt/Row.py", line 37, in __init__
raise ValueError("row index was %r, not allowed by .xls format" % rowx)
ValueError: row index was 65536, not allowed by .xls format
>>>
>>> # 保存为.xlsx文件
>>> df1.to_excel('test10.xlsx', index=False)
>>>
>>>
>>> # 读取.xlsx文件,默认engine为xlrd
>>> # engine : string, default None
>>> # If io is not a buffer or path, this must be set to identify io.
>>> # Acceptable values are None or ``xlrd``.(可接受的值是None或' ' xlrd ' '。)
>>> df2 = pd.read_excel('test10.xlsx')
>>> df2.shape
(100000, 3)
>>>
>>> # 无法指定engine为openpyxl
>>> df2 = pd.read_excel('test10.xlsx', engine='openpyxl')
Traceback (most recent call last):
File "" , line 1, in <module>
File "/Library/Python/2.7/site-packages/pandas/util/_decorators.py", line 188, in wrapper
return func(*args, **kwargs)
File "/Library/Python/2.7/site-packages/pandas/util/_decorators.py", line 188, in wrapper
return func(*args, **kwargs)
File "/Library/Python/2.7/site-packages/pandas/io/excel.py", line 350, in read_excel
io = ExcelFile(io, engine=engine)
File "/Library/Python/2.7/site-packages/pandas/io/excel.py", line 646, in __init__
raise ValueError("Unknown engine: {engine}".format(engine=engine))
ValueError: Unknown engine: openpyxl
>>>
>>>
读取xlsx文件需指定engine。
$ python
Python 3.7.8 (v3.7.8:4b47a5b6ba, Jun 27 2020, 04:47:50)
[Clang 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>>
>>> import pandas as pd # 1.1.5
>>>
>>>
# 如果io不是缓冲区或路径,则必须将其设置为标识io。
# 支持引擎:xlrd,openpyxl,odf,pyxlsb,默认xlrd。
# 引擎的兼容性:
# xlrd支持大多数旧/新的Excel文件格式。
# openpyxl支持更新的Excel文件格式。
# odf支持OpenDocument文件格式(.odf, .ods, .odt)。
# pyxlsb支持二进制Excel文件。
>>> df2 = pd.read_excel('test10.xlsx')
Traceback (most recent call last):
File "" , line 1, in <module>
File "/Users/macminimd389/.virtualenvs/django3/lib/python3.7/site-packages/pandas/util/_decorators.py", line 296, in wrapper
return func(*args, **kwargs)
File "/Users/macminimd389/.virtualenvs/django3/lib/python3.7/site-packages/pandas/io/excel/_base.py", line 304, in read_excel
io = ExcelFile(io, engine=engine)
File "/Users/macminimd389/.virtualenvs/django3/lib/python3.7/site-packages/pandas/io/excel/_base.py", line 867, in __init__
self._reader = self._engines[engine](self._io)
File "/Users/macminimd389/.virtualenvs/django3/lib/python3.7/site-packages/pandas/io/excel/_xlrd.py", line 22, in __init__
super().__init__(filepath_or_buffer)
File "/Users/macminimd389/.virtualenvs/django3/lib/python3.7/site-packages/pandas/io/excel/_base.py", line 353, in __init__
self.book = self.load_workbook(filepath_or_buffer)
File "/Users/macminimd389/.virtualenvs/django3/lib/python3.7/site-packages/pandas/io/excel/_xlrd.py", line 37, in load_workbook
return open_workbook(filepath_or_buffer)
File "/Users/macminimd389/.virtualenvs/django3/lib/python3.7/site-packages/xlrd/__init__.py", line 170, in open_workbook
raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported')
xlrd.biffh.XLRDError: Excel xlsx file; not supported
>>>
>>> df2 = pd.read_excel('test10.xlsx', engine='openpyxl')
>>> df2.shape
(100000, 3)
>>>
>>>