python异步编程async def

不使用异步的代码

# # -*- coding: utf-8 -*-
import json
import os
from urllib import request
import csv
import sys
import argparse


parser = argparse.ArgumentParser()
parser.add_argument('--csv_dir', default='tmp/csv', type=str,
                    help="root path of images and labels, include ./images and ./labels and classes.txt")
parser.add_argument('--save_dir', default='tmp/save_1', type=str,
                    help="root path of images and labels, include ./images and ./labels and classes.txt")

arg = parser.parse_args()

csv.field_size_limit(sys.maxsize)
csv_dir=arg.csv_dir
csv_all=os.listdir(csv_dir)   
save_dir=arg.save_dir
save_images_path=os.path.join(save_dir,"images")
save_labels_path=os.path.join(save_dir,"labels_json")

if not os.path.exists(save_images_path):
    os.mkdir(save_images_path)
if not os.path.exists(save_labels_path):
    os.mkdir(save_labels_path)   
for csv_name in csv_all:    
    f=open(os.path.join(csv_dir,csv_name),encoding='UTF8') #encoding = 'gb2312';encoding='UTF8'
    csv_reader=csv.reader(f)
    i=0
    for line in csv_reader:
        if i>0:
            data_id=line[0]
            jpg_name=line[2].split('/')[-1]
            if not jpg_name.endswith('jpg'):
                continue
            jpg_url=line[2]
            json_1=line[3]
            json_result=json.loads(json_1)
            is_value = json_result['info'][0]['value']
            if is_value=='No':
                continue
            with open(save_labels_path +'/' +jpg_name.split('.')[0]+'.json','w',encoding='utf-8') as ftxt:
                json.dump(json_result,ftxt,ensure_ascii=False)
            try:
                request.urlretrieve(jpg_url, save_images_path+'/' +jpg_name)
            except:
                print("error image", jpg_name)
                continue
        i=i+1
    print(i)

使用异步编程,可以同时下载多张图片,但是会比上边的方式少下载一些图片,具体原因还未排查。

# # -*- coding: utf-8 -*-
import json
import os
import csv
import sys
import argparse
import asyncio
import requests_async as requests

parser = argparse.ArgumentParser()
parser.add_argument('--csv_dir', default='tmp/csv', type=str,
                    help="root path of images and labels, include ./images and ./labels and classes.txt")

parser.add_argument('--save_dir', default='tmp/save_1', type=str,
                    help="root path of images and labels, include ./images and ./labels and classes.txt")


arg = parser.parse_args()

csv.field_size_limit(sys.maxsize)
csv_dir=arg.csv_dir
csv_all=os.listdir(csv_dir)   
save_dir=arg.save_dir
save_images_path=os.path.join(save_dir,"images")
save_labels_path=os.path.join(save_dir,"labels_json")
async def download_images_and_save2file(page, inx, jpg_name, image_url):
    try:
        response = await requests.get(image_url)

        with open(f"{save_images_path}/{jpg_name}", "wb") as fp:
            fp.write(response.content)
        print(f"page: {page}, inx: {inx}, url: {image_url} 完成!")

    except Exception as e:
        print(f"e: {e}")

async def download_imgs():
    if not os.path.exists(save_images_path):
        os.mkdir(save_images_path)
    if not os.path.exists(save_labels_path):
        os.mkdir(save_labels_path)   
    for csv_name in csv_all:    
        f=open(os.path.join(csv_dir,csv_name),encoding='UTF8') #encoding = 'gb2312';encoding='UTF8'
        csv_reader = csv.reader(f)
        csv.field_size_limit(sys.maxsize)
        inx = 1
        page = 1
        await_list = []
        for line in csv_reader:
            #print(line)
            if "data_id" in line :
                continue
            jpg_name = line[2].split('/')[-1]
            jpg_url = line[2]
            json_1 = line[3]
            json_result = json.loads(json_1)

            is_value = json_result['info'][0]['value']
            if is_value=='No':
                continue
            with open(save_labels_path +'/' +jpg_name.split('.')[0]+'.json','w',encoding='utf-8') as ftxt:
                json.dump(json_result,ftxt,ensure_ascii=False)

            if inx == 0 or inx % 50 != 0:  # 50个一组
                inx += 1
                print(f"page: {page}, inx: {inx} 进行中...")
                await_list.append(download_images_and_save2file(page=page, inx=inx, jpg_name=jpg_name, image_url=jpg_url))
            else:
                inx = 0
                page += 1
                await asyncio.gather(*await_list)
                await_list = []

if __name__ == '__main__':
    asyncio.run(download_imgs())

你可能感兴趣的:(python)