之前up主把资源删掉了,重新找的资源,从P22 01 输出目录下所有文件及文件夹 开始
答案不定时更新(已更新P22-P35),目前我也在跟着学习。课程链接是在https://www.bilibili.com/video/BV1CQ4y1K7MK?p=30
怕大家把答案和视频资源匹配出错,这是那个课件的老师,应该认不错了吧。废话不多说,上代码。.
'''----------1、统计文件名中含python的文件------------'''
# def check_python():
# a = []
# c=[]
# sum = 0
# for file in os.scandir():
# if file.is_dir() is False:
# a.append(file.name)
# c.append(file.name.lower())
# print(a,c)
# for i in a:
# b = re.findall('python', i, re.I)
# # if ('python' or 'Python') in b:
# # sum += 1
# if bool(b) is True:
# sum+=1
#
# # for i in a:
# # b = re.findall('python', i,re.I)
# # if len(b) is not 0:
# # sum+=1
#
# print('the document included python is :')
# print(a)
# print('the number of document included python is :' + str(sum))
#
# check_python()
'''-------------2、自动筛选日期和大小的文件--------------'''
# def check(path):
# # zip_file = (glob.glob('C:/Users/Administrator/Desktop/课题汇总/**/*.zip', recursive=True)) # 把zip文件输出保存成列表
# zip_file = (glob.glob(path+'\**\*.zip', recursive=True)) # 把zip文件输出保存成列表
# print(zip_file) #输出路径为 反双斜杠 指具体文件的路径,非文件夹的路径 文件路径=文件夹路径+文件名
# for zip_path in zip_file:
# container = os.stat(zip_path).st_size / 1024 / 1024 # 把文件转化成以M为单位
# container_time = os.stat(zip_path).st_atime # unxi时间戳
# # 1577808000 2019-12-31 24h
# if (container > 100) and (container_time > 1577808000):
# print('-------')
# print(zip_path)
# print('文件容量:', os.stat(zip_path).st_size / 1024 / 1024)
# print('Unix时间戳:', os.stat(zip_path).st_atime)
# print('时间:', datetime.datetime.fromtimestamp(os.stat(zip_path).st_atime))
# check(r'C:\Users\Administrator\Desktop\课题汇总') #书写格式 r'文件路径'
# check(r'F:\\')#转义字符
'''3、批量修改文件名并移动'''
import os
import shutil
import time
'''重命名文件要使用 文件夹路径和文件名,使用os.walk()可以获得'''
# for dirpath, dirnames, files in os.walk(r'C:\Users\Administrator\Desktop\邴栋-毕业课题资料包 - 副本'):
# for file_name in files:
# if '.zip' in file_name:
# print(file_name,dirpath)
# time_inserted=time.ctime(os.stat(os.path.join(dirpath,file_name)).st_atime)
# print(type(time_inserted))
# os.rename(os.path.join(dirpath,file_name),os.path.join(dirpath,'time_inserted'+file_name))
# zip_file_list = (glob.glob(r'C:\Users\Administrator\Desktop\邴栋-毕业课题资料包 - 副本\**\*.zip', recursive=True)) # 把zip文件输出保存成列表
# print(zip_file_list)
# if not os.path.exists('C:\\Users\\Administrator\\Desktop\\邴栋-毕业课题资料包 - 副本\\Abackup'):#创建文件夹
# os.mkdir(r'C:\Users\Administrator\Desktop\邴栋-毕业课题资料包 - 副本\Abackup')
# for zip_file in zip_file_list:#将zip文件放进去
# shutil.copy(zip_file,r'C:\Users\Administrator\Desktop\邴栋-毕业课题资料包 - 副本\Abackup')
'''4、自动打包过期文件并移动到备份文件夹'''
import os
import datetime
from time import strftime
import zipfile
# file_list=[]
# for dirpath,dirname,files in os.walk('C:\\Users\\Administrator\\Desktop\\邴栋-毕业课题资料包 - 副本'):
# for file_name in files:
# file_path = os.path.join(dirpath,file_name)
# if datetime.datetime.fromtimestamp(os.stat(file_path).st_mtime) < datetime.datetime(2020, 2, 18, 13, 58, 0,111111):
# os.rename(file_path,os.path.join(dirpath,time.strftime("%Y-%m-%d")+file_name))
# print(dirpath, file_name)
# file_list.append(os.path.join(dirpath,time.strftime("%Y-%m-%d")+file_name))
# print(file_list)
# if not os.path.exists('C:\\Users\\Administrator\\Desktop\\backup'):
# os.mkdir('C:\\Users\\Administrator\\Desktop\\backup')
# with zipfile.ZipFile('C:\\Users\\Administrator\\Desktop\\backup\\20200518.zip','w')as zipobj:
# for file in file_list:
# zipobj.write(file)
# for file in file_list:
# os.remove(file)
'''5、找到空缺数据位置'''
from openpyxl import load_workbook
workbook=load_workbook(filename='C:\\Users\\Administrator\\Desktop\\离子浓度记录数据.xlsx')
print(workbook.sheetnames)
sheet=workbook['测试数据'] # 通过sheet名称获取表格
space_cell=[]
for row in sheet.rows:
for cell in row:
# print(cell.value,cell.coordinate)
if cell.value== None: #None代指空白值
space_cell.append(cell.coordinate)
# print(cell.coordinate)
print(len(space_cell),space_cell)
'''6、寻找数据并放入新表格'''
from openpyxl import load_workbook
from openpyxl import Workbook
'''新建excel准备存储数据'''
workbook1=Workbook()
sheet1=workbook1.active
sheet1.title=('筛选数据')
'''打开需要筛选表格,筛选需要数据'''
workbook=load_workbook(filename='C:\\Users\\Administrator\\Desktop\\7月下旬入库表.xlsx')
sheet=workbook.active
'''筛选D列,找出列中数值<40.5的数据所在的 行'''
cells=sheet['D']
SelectRowsNumber=[]
for cell in cells:
if str(cell.value)<str(40.5):
SelectRowsNumber.append(cell.row)#列表中保存筛选出的‘行’
print(SelectRowsNumber)
'''将筛选出的行,复制到新表 '''
for i in SelectRowsNumber:
SelectRows = [] #保证不重复
for cell in sheet[i]:
print(cell.value)
SelectRows.append(cell.value)
print(SelectRows)
print(SelectRows)
sheet1.append(SelectRows) #保证每次添加的列表不重复之前存在的内容
workbook1.save(filename='C:\\Users\\Administrator\\Desktop\\selset.xlsx')
'''7、省事省力让程序自动美化你的表格'''
from openpyxl import load_workbook
'''打开需要筛选表格,筛选需要数据'''
workbook=load_workbook(filename='C:\\Users\\Administrator\\Desktop\\7月下旬入库表.xlsx')
sheet=workbook['7月下旬入库表']
# '''筛选A列,找出列中数值日期在20180722以前的数据所在的 行'''
def SelsctRow(sheetname,col,number):
SelectRowsNumber = []
cells = workbook[sheetname][col]
for cell in cells:
# print(cell.value)
if str(cell.value) < str(number):
SelectRowsNumber.append(cell.row)
return SelectRowsNumber
FirstSelectRowsNumber=SelsctRow('7月下旬入库表','A','2018-07-22 00:00:00')
print(FirstSelectRowsNumber)
'''针对筛选出的行 根据E列数值低于41再次筛选 行'''
def TwiceSelectRow(firstSelectRowList,col,number):
SelectRowsNumber = []
for i in firstSelectRowList:
if str(sheet[col + str(i)].value) < str(number):
SelectRowsNumber.append(sheet[col + str(i)].row)
return SelectRowsNumber
FinalSelectRowsNumber=TwiceSelectRow(FirstSelectRowsNumber,'E',41)
print(FinalSelectRowsNumber)
'''将最终数据转移至新表'''
workbook.create_sheet('表1')
sheet1=workbook['表1']
def SelectRowToNew_sheet(SelectList,NewSheet):
for i in SelectList:
SelectRows = [] # 保证不重复
for cell in sheet[i]:
# print(cell.value)
SelectRows.append(cell.value)
# print(SelectRows)
# print(SelectRows)
NewSheet.append(SelectRows) # 保证每次添加的列表不重复之前存在的内容
SelectRowToNew_sheet(FinalSelectRowsNumber,sheet1)
'''将最终筛选数据 数值小于40.5 上色'''
from openpyxl.styles import Font,PatternFill
pattern_fill=PatternFill(fill_type='solid',fgColor='FFFF0000')
font=Font(color='FFFFFFFF')
ColorSelectRow=SelsctRow('表1','E','40.5')
print(ColorSelectRow)
for i in ColorSelectRow:
for cell in sheet1[i]: #注意,此处是 新表
# print(cell)
cell.fill= pattern_fill
cell.font = font
'''删掉多余行 这个过程运行时间很长 比较鸡肋'''
for i in range(2,1847):
if i not in FinalSelectRowsNumber:
sheet.delete_rows(idx=i)
sheet.insert_rows(idx=i)
workbook.remove(sheet)
sheet1.title=('筛选')
workbook.save(filename='C:\\Users\\Administrator\\Desktop\\7月下旬入库表.xlsx')
import PyPDF2
import pdfplumber
from openpyxl import Workbook
workbook=Workbook()#新建工作簿
sheet=workbook.active#激活工作表
sheet.title='No13data'#
workbook.create_sheet('No14data')
sheet1=workbook['No14data']
with pdfplumber.open('C:\\Users\\Administrator\\Desktop\\网易2019第二季度财报.pdf') as pdf:
table_page = pdf.pages[13] # 打开pdf第14页
table = table_page.extract_table( # 提取单个表格
table_settings={ # 提取表格时的设定
"vertical_strategy": "text",
"horizontal_strategy": "text"
}
)
print(table)
for row in table:
if not (''.join([str(item) for item in row])) == ('NoneNoneNone'): # 列表推导式 把每一行数据搞成一个字符串 保证非空值时进行下一步操作
if not (''.join([str(item) for item in row])) == ('NoneNoneNoneNoneNoneNoneNoneNone'):
new_row = []
new_row.append(''.join([str(item) if item else '' for item in row[0:5]])) # 对于每一行前2个数据 如果非空 搞成一个字符串
# new_row += row[5:] # 把每一行 后几个数据加进列表里
for item in row[5:]:
if not item == None:
new_row.append(item)
sheet1.append(new_row)
workbook.save(filename='C:\\Users\\Administrator\\Desktop\\网易2019第二季度财报.xlsx')
'''9、分割合并排序PDF文件'''
from PyPDF2 import PdfFileReader,PdfFileWriter
pdf_reader=PdfFileReader('C:\\Users\\Administrator\\Desktop\\网易2019第二季度财报.pdf')
# print(pdf_reader.getNumPages())
pdf_writer=PdfFileWriter()
for page in range(pdf_reader.getNumPages()):
if page%2==0:
pdf_writer.addPage(pdf_reader.getPage(page))
with open('C:\\Users\\Administrator\\Desktop\\reordered.pdf','wb') as out:
pdf_writer.write(out)
pdf_reader=PdfFileReader('C:\\Users\\Administrator\\Desktop\\reordered.pdf')
pdf_writer=PdfFileWriter()
for page in range(pdf_reader.getNumPages()-1,-1,-1):
pdf_writer.addPage(pdf_reader.getPage(page))
with open('C:\\Users\\Administrator\\Desktop\\reordered.pdf','wb') as out:
pdf_writer.write(out)
'''10、加水印并加密pdf'''
from PyPDF2 import PdfFileReader,PdfFileWriter
from copy import copy
watermark_pdf=PdfFileReader('C:\\Users\\Administrator\\Desktop\\watermark.pdf')
watermark_page=watermark_pdf.getPage(0)
pdf_reader=PdfFileReader('C:\\Users\\Administrator\\Desktop\\reordered.pdf')
pdf_writer=PdfFileWriter()
for page in range(pdf_reader.getNumPages()):
original_page=pdf_reader.getPage(page)
new_page=copy(watermark_page)
new_page.mergePage(original_page)
pdf_writer.addPage(new_page)
pdf_writer.encrypt('1')
with open('C:\\Users\\Administrator\\Desktop\\encrypted.pdf','wb') as out:
pdf_writer.write(out)
'''11、word文档内某一内容出现的次数'''
from docx import Document
import re
doc=Document('C:\\Users\\Administrator\\Desktop\\编程代码.docx')
num=0
for i in range(len(doc.paragraphs)):#文档内段落数目
paragraph=doc.paragraphs[i]
runs=paragraph.runs#具体段落内文字块
for run in runs:
if len(re.findall('file',run.text,re.I))>0:# run.text为字符串,利用正则表达式将文字块内‘file’找出来,忽略大小写
print(run.text)
print(re.findall('file',run.text,re.I))
num=num+len(re.findall('file',run.text,re.I))
print('-----------------')
print('number of ''file'' is '+str(num))