1.多线程
''' 进程: 对于操作系统来说,一个任务就是一个进程(Process),比如打开一个浏览器就是启动一个浏览器进程, 打开一个记事本就启动了一个记事本进程,打开两个记事本就启动了两个记事本进程,打开一个Word就启动了 一个Word进程。进程是很多资源的集合 进程和进程之间是相互独立的 线程 线程和线程之间都是独立的。 进程里面本身就有一个线程,这个线程叫做主线程。
python GIL 全局解释器锁,导致python的多线程利用不了多核cpu,但是多进程可以。 什么时候使用多进程: CPU密集型任务:多进程 如排序或计算的时候 是么时候使用多线程: IO密集型任务:多线程 input output(频繁的网络读写,频繁的磁盘读写) 进程相当于是一个工厂,而线程就相当于是一个员工,工厂若想运作必须至少有一个员工(一个进程默认有一个线程,主线程) '''
import threading import time def lajifenlei(): time.sleep(2)#模拟该方法内存代码运行时间,这里假设代码运行2s print('干垃圾')
# 1.不用多线程,模拟10次方法调用耗时 start_time = time.time() for n in range(10): lajifenlei() end_time = time.time() print('不用多线程时调用10次方法总耗时:%s' % (end_time - start_time))
测试结果
# 2.用多线程模拟-1 start_time = time.time() for n in range(10): xiaohei = threading.Thread(target=lajifenlei,) xiaohei.start() end_time = time.time() print('用多线程时调用10次方法总耗时:%s' % (end_time - start_time))#统计出来的只是主线程运行结束后的耗时
测试结果:
# 2.用多线程模拟-2--实际耗时 start_time = time.time() for n in range(10):#启了10个子线程 xiaohei = threading.Thread(target=lajifenlei,) xiaohei.start() while threading.active_count() !=1:#总线程数:1个主线程+10个子线程,当活动的线程只剩下一个主线程时,说明其他线程都执行完了 pass end_time = time.time() print('用多线程时调用10次方法总耗时:%s' % (end_time - start_time))#统计出来的只是主线程运行结束后的耗时
测试结果:
# 带参数的多线程 #1.一个参数 import threading import time def lajifenlei(name): time.sleep(2)#模拟该方法内存代码运行时间,这里假设代码运行2s print('我的名字叫%s,我的工作是垃圾分类'%name) start_time = time.time() for n in range(10):#启了10个子线程 xiaohei = threading.Thread(target=lajifenlei,args=('xiaoming',)) xiaohei.start() while threading.active_count() !=1:#总线程数:1个主线程+10个子线程,当活动的线程只剩下一个主线程时,说明其他线程都执行完了 pass end_time = time.time() print('用多线程时调用10次方法总耗时:%s' % (end_time - start_time))#统计出来的只是主线程运行结束后的耗时
测试结果:
# 2.多个参数 import threading import time def lajifenlei(name,sex): time.sleep(2)#模拟该方法内存代码运行时间,这里假设代码运行2s print('我的名字叫%s,我是一个%s生,我的工作是垃圾分类'%(name,sex)) start_time = time.time() for n in range(10):#启了10个子线程 xiaohei = threading.Thread(target=lajifenlei,args=('xiaoming','女')) xiaohei.start() while threading.active_count() !=1:#总线程数:1个主线程+10个子线程,当活动的线程只剩下一个主线程时,说明其他线程都执行完了 pass end_time = time.time() print('用多线程时调用10次方法总耗时:%s' % (end_time - start_time))#统计出来的只是主线程运行结束后的耗时 测试结果:
# 2.多个参数--list import threading import time def lajifenlei(name,sex): time.sleep(2)#模拟该方法内存代码运行时间,这里假设代码运行2s print('我的名字叫%s,我是一个%s生,我的工作是垃圾分类'%(name,sex)) start_time = time.time() list = [['xiaoming1','男'],['xiaoming2','女'],['xiaoming3','女'],['xiaoming4','女'],['xiaoming5','男']] for l in list:#启了10个子线程 xiaohei = threading.Thread(target=lajifenlei,args=(l)) xiaohei.start() while threading.active_count() !=1:#总线程数:1个主线程+10个子线程,当活动的线程只剩下一个主线程时,说明其他线程都执行完了 pass end_time = time.time() print('用多线程时调用10次方法总耗时:%s' % (end_time - start_time))#统计出来的只是主线程运行结束后的耗时 测试结果:
#由于多线程或进程的调用是没法获取方法的返回值的,如果想要方法的返回值的话可以定义个全局变量,然后把结果放到这个变量中 import threading import time list = [] def lajifenlei(name,sex): time.sleep(2)#模拟该方法内存代码运行时间,这里假设代码运行2s list.append('我的名字叫%s,我是一个%s生,我的工作是垃圾分类'%(name,sex)) start_time = time.time() list = [['xiaoming1','男'],['xiaoming2','女'],['xiaoming3','女'],['xiaoming4','女'],['xiaoming5','男']] for l in list:#启了10个子线程 xiaohei = threading.Thread(target=lajifenlei,args=(l)) xiaohei.start() while threading.active_count() !=1:#总线程数:1个主线程+10个子线程,当活动的线程只剩下一个主线程时,说明其他线程都执行完了 pass end_time = time.time() print('list的值为:%s' % list) print('用多线程时调用10次方法总耗时:%s' % (end_time - start_time))#统计出来的只是主线程运行结束后的耗时 测试结果:
多线程下载文件案例
import requests,hashlib def down_load_file(url): r = requests.get(url) m = hashlib.md5(url.encode()) file_name = m.hexdigest() print('正在下载%s'%file_name) with open('img/%s.jpg'% file_name,'wb') as fw: fw.write(r.content) url_list = ['https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1564282263&di=9d1edb5e67c65051336a4ed2c7c5f56a&imgtype=jpg&er=1&src=http%3A%2F%2Fku.90sjimg.com%2Felement_origin_min_pic%2F18%2F03%2F27%2F1d7b7b1c20e2013963d677003e587421.jpg%21%2Ffwfh%2F804x804%2Fquality%2F90%2Funsharp%2Ftrue%2Fcompress%2Ftrue','https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy/it/u=1693892888,2363320737&fm=26&gp=0.jpg','https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1563687763106&di=256bf12a6a322a5d161d9c430605eafa&imgtype=0&src=http%3A%2F%2Fhbimg.b0.upaiyun.com%2F02360ab0bb74f7a353a3d121d948d1e497b54dc61db49-bX40Ok_fw658'] start_time = time.time() for url in url_list: t = threading.Thread(target=down_load_file,args=(url,)) t.start()#多线程 while threading.active_count() !=1: pass end_time = time.time() print('多线程耗时:',end_time-start_time)
测试结果:
2.线程锁(如果大家都同时修改同一个值,那么这个值需要加上锁)
import threading import time count = 0 lock = threading.Lock()#申请一把锁 def lajifenlei(): global count lock.acquire()#锁开始的地方 count += 1 lock.release()#锁结束的地方 for n in range(1000): th = threading.Thread(target=lajifenlei(),) print('100个人加完1后count值为:%s' % count ) 测试结果: 100个人加完1后count值为:1000
3.守护线程
def lajifenlei(): time.sleep(2) print('干垃圾') for i in range(10): syy = threading.Thread(target=lajifenlei,) syy.setDaemon(True)#把子线程设置成守护线程,只要主进程结束,守护进程不管有没有工作完都要立马结束 syy.start() print('完成') 测试结果: 完成
4.多进程
import multiprocessing import time def lajifenlei(): time.sleep(2) print('干垃圾') if __name__ == '__main__':#调用进程的必须 for n in range(10): mp = multiprocessing.Process(target=lajifenlei,) mp.start() print('进程%s已启动'%mp.pid) start_time = time.time() print(multiprocessing.active_children()) while len(multiprocessing.active_children()) !=0:#等待子进程执行完成 pass end_time = time.time() print('耗时:%s'%(end_time-start_time))
测试结果:
import multiprocessing import threading import time def sayHello(name): time.sleep(20) print('%s你好呀!'%name) def lajifenlei(): print('干垃圾start') for n in range(5): th = threading.Thread(target=sayHello,args=('wxytest_%s'% n,)) th.start() print('干垃圾end') if __name__ == '__main__':#调用进程的必须 for n in range(2): mp = multiprocessing.Process(target=lajifenlei,) mp.start() print('进程%s已启动'%mp.pid) start_time = time.time() print(multiprocessing.active_children()) while len(multiprocessing.active_children()) !=0:#等待子进程执行完成 pass end_time = time.time() print('耗时:%s'%(end_time-start_time)) 测试结果:
5.线程池
import threadpool import requests,hashlib import threading def down_load_file(url): r = requests.get(url) m = hashlib.md5(url.encode()) file_name = m.hexdigest() print('正在下载%s'%file_name) with open('img/%s.jpg'% file_name,'wb') as fw: fw.write(r.content) url_list = ['https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1564282263&di=9d1edb5e67c65051336a4ed2c7c5f56a&imgtype=jpg&er=1&src=http%3A%2F%2Fku.90sjimg.com%2Felement_origin_min_pic%2F18%2F03%2F27%2F1d7b7b1c20e2013963d677003e587421.jpg%21%2Ffwfh%2F804x804%2Fquality%2F90%2Funsharp%2Ftrue%2Fcompress%2Ftrue','https://ss1.bdstatic.com/70cFuXSh_Q1YnxGkpoWK1HF6hhy/it/u=1693892888,2363320737&fm=26&gp=0.jpg','https://timgsa.baidu.com/timg?image&quality=80&size=b9999_10000&sec=1563687763106&di=256bf12a6a322a5d161d9c430605eafa&imgtype=0&src=http%3A%2F%2Fhbimg.b0.upaiyun.com%2F02360ab0bb74f7a353a3d121d948d1e497b54dc61db49-bX40Ok_fw658'] pool = threadpool.ThreadPool(10)#创建一个线程池,总共启10个线程,不管用的了用不了那么多线程,10个线程都会启动 reqs = threadpool.makeRequests(down_load_file,url_list) # for req in reqs: # pool.putRequest(req) [pool.putRequest(req) for req in reqs] print(threading.active_count()) pool.wait() print('测试结束') 测试结果:
6.jsonpath模块(字典快速查找key)
import jsonpath d = { "stu":{ "sex":'男', "house":{ "beijing":{"四环":5,"三环":4}, "上海":{"浦东":4} } }, "stu2":{ "sex2":'女', "house":{ "beijing":{"二环":6,"三环":8}, "河北":{"涞源":6} } } } # print(d['stu']['house']['上海']) r1 = jsonpath.jsonpath(d,'$.stu')#获取当前字典中key(一级key)为"beijing"的vule,找不到则返回false print(r1) r2 = jsonpath.jsonpath(d,'$.beijing')#获取当前字典中key(一级key)为"beijing"的vule print(r2) result = jsonpath.jsonpath(d,'$..beijing')#模糊查询,获取当前字典中key(所有级key)为"beijing"的所有value print(result) 测试结果: [{'sex': '男', 'house': {'beijing': {'四环': 5, '三环': 4}, '上海': {'浦东': 4}}}] False [{'四环': 5, '三环': 4}, {'二环': 6, '三环': 8}]
7.faker模块(需要参数化的参数可以用该模块快速实现参数化)
import faker f = faker.Faker(locale='zh-CN') print(dir(f)) print(f.name()) print(f.user_name()) print(f.md5()) print(f.hostname()) print(f.year()) 测试结果: ['_Generator__config', '_Generator__format_token', '_Generator__random', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'add_provider', 'address', 'am_pm', 'ascii_company_email', 'ascii_email', 'ascii_free_email', 'ascii_safe_email', 'bank_country', 'bban', 'binary', 'boolean', 'bothify', 'bs', 'building_number', 'catch_phrase', 'century', 'chrome', 'city', 'city_name', 'city_suffix', 'color_name', 'company', 'company_email', 'company_prefix', 'company_suffix', 'coordinate', 'country', 'country_code', 'credit_card_expire', 'credit_card_full', 'credit_card_number', 'credit_card_provider', 'credit_card_security_code', 'cryptocurrency', 'cryptocurrency_code', 'cryptocurrency_name', 'currency', 'currency_code', 'currency_name', 'date', 'date_between', 'date_between_dates', 'date_object', 'date_of_birth', 'date_this_century', 'date_this_decade', 'date_this_month', 'date_this_year', 'date_time', 'date_time_ad', 'date_time_between', 'date_time_between_dates', 'date_time_this_century', 'date_time_this_decade', 'date_time_this_month', 'date_time_this_year', 'day_of_month', 'day_of_week', 'district', 'domain_name', 'domain_word', 'ean', 'ean13', 'ean8', 'email', 'file_extension', 'file_name', 'file_path', 'firefox', 'first_name', 'first_name_female', 'first_name_male', 'first_romanized_name', 'format', 'free_email', 'free_email_domain', 'future_date', 'future_datetime', 'get_formatter', 'get_providers', 'hex_color', 'hexify', 'hostname', 'iban', 'image_url', 'internet_explorer', 'ipv4', 'ipv4_network_class', 'ipv4_private', 'ipv4_public', 'ipv6', 'isbn10', 'isbn13', 'iso8601', 'job', 'language_code', 'last_name', 'last_name_female', 'last_name_male', 'last_romanized_name', 'latitude', 'latlng', 'lexify', 'license_plate', 'linux_platform_token', 'linux_processor', 'local_latlng', 'locale', 'location_on_land', 'longitude', 'mac_address', 'mac_platform_token', 'mac_processor', 'md5', 'mime_type', 'month', 'month_name', 'msisdn', 'name', 'name_female', 'name_male', 'null_boolean', 'numerify', 'opera', 'paragraph', 'paragraphs', 'parse', 'password', 'past_date', 'past_datetime', 'phone_number', 'phonenumber_prefix', 'postcode', 'prefix', 'prefix_female', 'prefix_male', 'profile', 'provider', 'providers', 'province', 'pybool', 'pydecimal', 'pydict', 'pyfloat', 'pyint', 'pyiterable', 'pylist', 'pyset', 'pystr', 'pystruct', 'pytuple', 'random', 'random_choices', 'random_digit', 'random_digit_not_null', 'random_digit_not_null_or_empty', 'random_digit_or_empty', 'random_element', 'random_elements', 'random_int', 'random_letter', 'random_letters', 'random_lowercase_letter', 'random_number', 'random_sample', 'random_uppercase_letter', 'randomize_nb_elements', 'rgb_color', 'rgb_css_color', 'romanized_name', 'safari', 'safe_color_name', 'safe_email', 'safe_hex_color', 'seed', 'seed_instance', 'sentence', 'sentences', 'set_formatter', 'sha1', 'sha256', 'simple_profile', 'slug', 'ssn', 'street_address', 'street_name', 'street_suffix', 'suffix', 'suffix_female', 'suffix_male', 'text', 'texts', 'time', 'time_delta', 'time_object', 'time_series', 'timezone', 'tld', 'unix_device', 'unix_partition', 'unix_time', 'uri', 'uri_extension', 'uri_page', 'uri_path', 'url', 'user_agent', 'user_name', 'uuid4', 'windows_platform_token', 'word', 'words', 'year'] 梁荣 kzheng e6ddc4998ec48de6065546f41bd89f8b web-15.leili.cn 2016