不说了,又是造轮子的一天。在此我要严重批评CSDN或百度一堆浑水摸鱼的,某些人明明代码明显报错也来上传发博客,要么就是标题党,代码没报错但压根就不是实现那个功能的,简直是浪费时间。
废话不多说直接贴代码:
先新建一个data文件夹,文件路径我填的是 ./data/
使用xlrd、xlwt:
import xlrd
import xlwt
import xlutils.copy
class XLSX:
__workbook = xlwt.Workbook(encoding='utf-8')
__sheet = __workbook.add_sheet('Sheet1', cell_overwrite_ok=True)
# 保存
def save(self, name, head_list, data_list):
path = './data/' + name + '.xlsx'
head = head_list
data = data_list
__row = 0
__col = 0
for head in head:
self.__sheet.write(__row, __col, head)
__col = __col + 1
for i in range(0, len(data)):
for j in range(1, len(data[i]) + 1):
self.__sheet.write(j, i, data[i][j - 1])
self.__workbook.save(path)
# 在原来的基础上增加 n 列,包含表头
@staticmethod
def save_append_col(name, data_list, head):
path = './data/' + name + '.xlsx'
file = xlrd.open_workbook(path)
table = file.sheet_by_index(0)
col_len = table.ncols
ws = xlutils.copy.copy(file)
ws_table = ws.get_sheet(0)
ws_table.write(0, col_len, head)
for i in range(1, len(data_list) + 1):
ws_table.write(i, col_len, data_list[i - 1])
ws.save(path)
# 在原来的基础上增加 n 列,包含表头,如果存在就在该列下面继续添加
@staticmethod
def save_append_col_append(name, data_list, head, line_index):
path = './data/' + name + '.xlsx'
file = xlrd.open_workbook(path)
table = file.sheet_by_index(0)
cols = table.row_values(0)
if head in cols:
col_len = cols.index(head)
ws = xlutils.copy.copy(file)
ws_table = ws.get_sheet(0)
for i in range(0, len(data_list)):
ws_table.write(line_index + i - 1, col_len, data_list[i])
ws.save(path)
else:
col_len = table.ncols
ws = xlutils.copy.copy(file)
ws_table = ws.get_sheet(0)
ws_table.write(0, col_len, head)
for i in range(1, len(data_list) + 1):
ws_table.write(i, col_len, data_list[i - 1])
ws.save(path)
# 在原来的基础上增加 n 行,这里是按照每一行的方式追加
@staticmethod
def save_append_row(name, data_list):
path = './data/' + name + '.xlsx'
file = xlrd.open_workbook(path)
table = file.sheet_by_index(0)
row_len = table.nrows
col_len = table.ncols
ws = xlutils.copy.copy(file)
ws_table = ws.get_sheet(0)
for i in range(0, len(data_list)):
for j in range(0, col_len):
ws_table.write(row_len + i, j, data_list[i][j])
ws.save(path)
# 获取文件所有数据
@staticmethod
def get_all_line(file_name):
file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
table = file.sheets()[0]
a = []
for i in range(1, table.nrows):
a.append(table.row_values(i))
return a
# 获取文件所有数据,以行的方式
@staticmethod
def get_all_line_col(file_name):
file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
sheet = file.sheet_by_index(0) # 获取第一个表格
result = []
rows = sheet.nrows
for row_idx in range(rows):
row_values = sheet.row_values(row_idx)
result.append(row_values)
return result
# 获取文件指定一列的所有数据
@staticmethod
def get_all_col(file_name, col_index):
file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
table = file.sheets()[0]
a = []
for i in range(1, table.nrows):
a.append(table.row_values(i)[col_index])
return a
# 获取文件指定一行的所有数据
@staticmethod
def get_one_line(file_name, line_count):
file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
t1 = file.sheets()[0]
return t1.row_values(line_count)
# 获取文件指定一行的指定一列的数据
@staticmethod
def get_one_line_col(file_name, line_count, col_count):
file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
t1 = file.sheets()[0]
return t1.row_values(line_count)[col_count]
# 获取文件总行数
@staticmethod
def get_row_length(file_name):
file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
table = file.sheet_by_index(0)
return table.nrows
# 获取文件指定一列的长度,不包含空值
@staticmethod
def get_one_col_length(file_name, col_index):
file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
table = file.sheets()[0]
col_len = 0
for row_idx in range(table.nrows):
cell_value = table.cell_value(row_idx, col_index)
if cell_value != '':
col_len += 1
return col_len
# 获取总列数
@staticmethod
def get_col_length(file_name):
file = xlrd.open_workbook('./data/' + file_name + '.xlsx')
table = file.sheet_by_index(0)
return table.ncols
# 获取重复列表的下标,假如第1/6/8行相同,5/9行相同,则返回[[1,6,8],[5,9]]
def get_repeat_list(self, file_name, col_index):
return self.find_duplicates(self.get_all_col(file_name, col_index))
# 删除行,[行数1,[行数2]]
@staticmethod
def delete_line(file_name, delete_rows):
path = './data/' + file_name + '.xlsx'
workbook = xlrd.open_workbook(path)
sheet = workbook.sheet_by_index(0)
rows_num = sheet.nrows
rows_data = []
for r in range(rows_num):
if r not in delete_rows:
rows_data.append(sheet.row_values(r))
book = xlwt.Workbook()
sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok=True)
# 将未删除行数据写入sheet
for r, row in enumerate(rows_data):
for c, val in enumerate(row):
sheet1.write(r, c, val)
book.save(path)
# 对所有列所有行进行缺失值处理,使用“缺失补充”来替代
@staticmethod
def deletion(file_name):
path = './data/' + file_name + '.xlsx'
workbook = xlrd.open_workbook(path)
sheet = workbook.sheet_by_index(0)
rows_num = sheet.nrows
cols_num = sheet.ncols
data = []
for r in range(rows_num):
row = []
for c in range(cols_num):
value = sheet.cell_value(r, c)
if value == '': # 如果数据为空,认为是缺失值
value = '缺失补充' # 替换为'缺失'字符串
row.append(value)
data.append(row)
book = xlwt.Workbook()
sheet1 = book.add_sheet('Sheet1', cell_overwrite_ok=True)
for r in range(rows_num):
for c in range(cols_num):
sheet1.write(r, c, data[r][c])
book.save(path)
@staticmethod
def find_duplicates(strs):
result = []
strs_dic = {}
for index, s in enumerate(strs):
if s not in strs_dic:
strs_dic[s] = [index]
else:
strs_dic[s].append(index)
for key in strs_dic:
if len(strs_dic[key]) > 1:
result.append(strs_dic[key])
return result
# 测试
# xx = XLSX()
# xx.save('ddd', ['表头1', '表头2', '表头3'],
# [['列1-第1行', '列1-第2行', '列1-第3行'], ['列2-第1行', '列2-第2行', '列2-第3行'],
# ['列3-第1行', '列3-第2行', '列3-第3行']])
# xx.save_append_row('ddd', [['列1-第4行', '列2-第4行', '列3-第4行'], ['列1-第5行', '列2-第5行', '列3-第5行'],
# ['列1-第6行', '列2-第6行', '列3-第6行']])
# xx.save_append_col('ddd', ['列4-第1行', '列4-第2行', '列4-第3行'], '表头4')
# xx.save_append_col_append('ddd', ['列4-第1行', '列4-第2行', '列4-第3行'], '表头4', 5)
# xx.delete_line('ddd', [5, 6])
# xx.deletion('ddd')
# print(xx.get_all_line('ddd'))
# print(xx.get_one_line('ddd', 2))
# print(xx.get_all_col('ddd', 3))
# print(xx.get_one_line_col('ddd', 1, 0))
# print(xx.get_row_length('ddd'))
# print(xx.get_col_length('ddd'))
# print(ee.get_repeat_list('ddd',1))
# print(xx.get_one_col_length('ddd', 3))
使用pandas:
import os
import numpy as np
import pandas as pd
class EXCEL:
# 保存
@staticmethod
def save(name, head_list, data_list):
path = r'./data/' + name + '.xlsx'
temp = {}
for i in range(len(head_list)):
temp[str(head_list[i])] = data_list[i]
data = pd.DataFrame(temp)
data.to_excel(path, index=False)
# 在原来的基础上增加一个sheet表
@staticmethod
def append_sheet(name, sheet_name, head_list):
write = pd.DataFrame(columns=head_list)
path = r'./data/' + name + '.xlsx'
if not os.path.exists(path):
write = pd.ExcelWriter(path, engine='openpyxl')
with pd.ExcelWriter(path, mode='a', engine='openpyxl') as writer:
write.to_excel(writer, sheet_name=sheet_name, index=False)
# 在原来的基础上增加 n 行,包含表头,这里是按照每一列的方式追加
@staticmethod
def append_row(name, head_list, data_list):
path = r'./data/' + name + '.xlsx'
temp = {}
for i in range(len(head_list)):
temp[str(head_list[i])] = data_list[i]
old_data = pd.read_excel(path)
new_data = pd.DataFrame(temp)
result = pd.concat([old_data, new_data])
result.to_excel(path, index=False)
# 在原来的基础上增加 n 列
@staticmethod
def append_col(name, head_name, data_list):
path = r'./data/' + name + '.xlsx'
df = pd.read_excel(path)
new_col = pd.Series(data_list)
new_col.name = head_name
result = pd.concat([df, new_col], axis=1)
result.to_excel(path, header=True, index=False)
# 获取文件所有数据,以行的方式
@staticmethod
def get_all_line(file_name):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
return df.values.tolist()
# 获取文件所有数据,以列的方式
@staticmethod
def get_all_line_col(file_name):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
cols = df.columns.tolist()
result = []
for col in cols:
result.append(df[col].values.tolist())
return result
# 获取文件指定一列的所有数据
@staticmethod
def get_all_col(file_name, col_index):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
col = df.iloc[:, col_index]
return col.values.tolist()
# 获取文件指定一行的所有数据
@staticmethod
def get_one_line(file_name, line_count):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
row = df.iloc[line_count]
return row.values.tolist()
# 获取文件指定一行的指定一列的数据
@staticmethod
def get_one_line_col(file_name, line_count, col_count):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
row = df.iloc[line_count]
col = row[col_count]
if isinstance(col, np.float64):
col = pd.Series(col)
return col
# 获取文件总行数
@staticmethod
def get_row_length(file_name):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
total_rows = df.shape[0]
return total_rows
# 获取文件总列数
@staticmethod
def get_col_length(file_name):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
total_cols = df.shape[1]
return total_cols
# 删除行,[行数1,[行数2]]
@staticmethod
def delete_line(file_name, delete_rows):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
df.drop(df.index[delete_rows], inplace=True)
df.to_excel(path, index=False)
# 获取重复列表的下标,假如第1/6/8行相同,5/9行相同,则返回[[1,6,8],[5,9]]
def get_repeat_list(self, file_name, col_index):
return self.find_duplicates(self.get_all_col(file_name, col_index))
# 对指定列 进行缺失值处理
@staticmethod
def deletion(file_name, col_head_name):
path = r'./data/' + file_name + '.xlsx'
df = pd.read_excel(path)
# num_col_type = df[col_head_name].dtype
# if num_col_type == 'int64' or num_col_type == 'float64':
# df[col_head_name] = df[col_head_name].fillna(df[col_head_name].median())
# else:
# df[col_head_name] = df[col_head_name].fillna('缺失')
# 不使用np.nan,也不判断类型了,统一用 “缺失补充”来替代
df[col_head_name] = df[col_head_name].fillna('缺失补充')
df.to_excel(path, index=False)
@staticmethod
def find_duplicates(strs):
result = []
strs_dic = {}
for index, s in enumerate(strs):
if s not in strs_dic:
strs_dic[s] = [index]
else:
strs_dic[s].append(index)
for key in strs_dic:
if len(strs_dic[key]) > 1:
result.append(strs_dic[key])
return result
# 测试
# ee = EXCEL()
# ee.save('ggg', ['表头1', '表头2', '表头3'],
# [['列1-第1行', '列1-第2行', '列1-第3行'], ['列2-第1行', '列2-第2行', '列2-第3行'],
# ['列3-第1行', '列3-第2行', '列3-第3行']])
#
# ee.append_sheet('ggg', 'Sheet2', ['表头1', '表头2', '表头3'])
#
# ee.append_row('ggg', ['表头1', '表头2', '表头3'],
# [['列1-第1行', '列1-第2行'], ['列2-第1行', '列2-第2行'], ['列3-第1行', '列3-第2行']])
#
# ee.append_col('ggg', '表头4', ['列4-第1行', '列4-第2行', '列4-第3行'])
# ee.delete_line('ggg', [2, 3])
# ee.deletion('ggg', '表头4')
#
# print(ee.get_all_line('ggg'))
# print(ee.get_one_line('ggg', 2))
# print(ee.get_all_col('ggg', 3))
# print(ee.get_one_line_col('ggg', 1, 0))
# print(ee.get_row_length('ggg'))
# print(ee.get_col_length('ggg'))
# print(ee.get_repeat_list('ggg',1))
# ee.repeat_count('ggg')
选一个就行了,不要把两份不同的写法混在一起用,两份写法有些是不一样的
最后我想说: