复制来的。。
import requests
from bs4 import BeautifulSoup
import time
import json
import os
import socket
# 设置请求超时时间,防止长时间停留在同一个请求
socket.setdefaulttimeout(8)
def sougou_pic_url(num, keyword):
pic_url = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
for i in range((num // 48) + 1):
url = 'https://pic.sogou.com/pics?query=' + keyword + '&mode=1&start={}&reqType=ajax&reqFrom=result&tn=0'.format(
i * 47)
imgs = requests.get(url)
jd = json.loads(imgs.text)
jd = jd['items']
for j in jd:
pic_url.append(j['pic_url'])
# print(len(pic_url))
return pic_url
def down_img(num, keyword):
pic_url = sougou_pic_url(num, keyword)
if os.path.exists('D:/p_images/' + keyword):
pass
else:
os.makedirs('D:/p_images/' + keyword)
path = 'D:/p_images/'
for index, i in enumerate(pic_url):
try:
filename = path + keyword + '/' + str(index) + '.png'
print(filename)
with open(filename, 'wb+') as f:
f.write(requests.get(i).content)
if (index >= (num - 1)):
break
except:
continue
if __name__ == '__main__':
while 1:
print("1.搜索图片")
print("2.退出程序")
print("提示:图片默认存储路径为 D:/p_images/")
choose = int(input("请选择:"))
if (choose == 1):
keyword = input('请输入图片关键词:')
num = int(input('请输入爬取图片数目:'))
down_img(num, keyword)
elif (choose == 2):
break
else:
print("输入有误,请重新输入!")
import imghdr
import os
def delect_webp_and_none_type(path):
for root, dir, file in os.walk(path):
for name in file:
target = (os.path.join(root, name))
result_type = imghdr.what(target)
if result_type == "webp" or result_type == None:
print(target)
os.remove(target)
if __name__ == "__main__":
delect_webp_and_none_type("D:\p_images\交通路标")
import os
import cv2
def resizeImg(w, h, imgs_floder, save_folder):
imgs = os.listdir(imgs_floder)
for img in imgs:
img_full_path = os.path.join(imgs_floder, img)
ori_img = cv2.imread(img_full_path)
img_r = cv2.resize(ori_img, (w, h))
print(img)
cv2.imwrite(os.path.join(save_folder, img), img_r)
return 0
if __name__ == '__main__':
print('----进行照片处理以及标签转换----')
# w, h需要%16 == 0
resizeImg(800, 560, "D:\BaiduNetdiskDownload\l01000~01999", "D:\BaiduNetdiskDownload\image2")
print('---处理完成---')
import os
import cv2
import shutil
def resizeImg(imgs_floder, save_folder):
imgs = os.listdir(imgs_floder)
print(imgs)
list1 = {}
for img in imgs:
list1[img.split('.')[0]+'.json'] = 0
print(list1)
for img in imgs:
if img.split('.')[1] == 'json':
list1[img] = 1
for img in imgs:
#print(img.split('.')[1])
#print(list1[img.split('.')[0]])
if img.split('.')[1] == 'png' and list1[img.split('.')[0]+'.json'] == 1:
print(1)
img_full_path = os.path.join(imgs_floder, img)
ori_img = cv2.imread(img_full_path)
cv2.imwrite(os.path.join(save_folder, img), ori_img)
shutil.copyfile(os.path.join(imgs_floder, img.split('.')[0]+'.json'), os.path.join(save_folder, img.split('.')[0]+'.json'))
return 0
if __name__ == '__main__':
print('----start out----')
# w, h需要%16 == 0
resizeImg( "D:/p_images/test", "D:/p_images/test2")
print('---处理完成---')