import threading
class MyThread(threading.Thread):
def __init__(self,threadname):
threading.Thread.__init__(self,name=threadname)
import threading,time
class MyThread(threading.Thread):
def __init__(self,threadname):
threading.Thread.__init__(self,name=threadname)
def run(self):
'''
run 方法
'''
for i in xrange(10):
print self.getName,i
time.sleep(1)
my = MyThread('test')
一个线程对象被创建后,他就处于“born”(诞生状态),
如何让这个线程对象开始运行呢?只要调用线程对象的
start()
方法即可
import threading,time
class MyThread(threading.Thread):
def __init__(self,threadname):
threading.Thread.__init__(self,name=threadname)
def run(self):
for i in xrange(10):
print self.getName(),i
time.sleep(1)
my = MyThread('test')
my.start()
现在线程就处于“ready”状态或者也称为“runnable”状态。
t1 = MyThread('t1')
print t1.getName(),t1.isDaemon()
t1.setDaemon(True)
print t1.getName(),t1.isDaemon()
t1.start()
print 'main thread exit'
import Queue
myqueue = Queue.Queue(maxsize = 10)
myqueue.put(10)
myqueue.get()
调用队列对象的get()方法从队头删除并返回一个项目。可选参数为block,默认为1。如果队列为空且block为1,get()就使调用线程暂停,直至有项目可用。如果block为0,队列将引发Empty异常。
#coding:utf-8
'''
今天我们来学习一下python里的多线程问题,并用一个多线程爬虫程序来实验。
@author FC_LAMP
有几点要说明一下:
1) 线程之间共享状态、内存、资源,并且它们相互间易于通信。
'''
import threading,urllib2
import datetime,time
import Queue
hosts = ['http://www.baidu.com','http://news.163.com/','http://weibo.com/u/2043979614','http://fc-lamp.blog.163.com']
class ThreadClass(threading.Thread):
def __init__(self,queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
'''
run 方法用于要执行的功能
'''
#getName()用于获取线程名称
while True:
#从队列中获取一个任务
host = self.queue.get()
#抓取工作
url = urllib2.urlopen(host)
print url.read(500)
#标记队列工作已完成
self.queue.task_done()
def main():
#创建队列实例
queue = Queue.Queue()
#生成一个线程池
for i in range(len(hosts)):
t = ThreadClass(queue)
#主程序退出时,子线程也立即退出
t.setDaemon(True)
#启动线程
t.start()
#向队列中填充数数
for host in hosts:
queue.put(host)
#只到所有任务完成后,才退出主程序
queue.join()
if __name__=='__main__':
st = time.time()
main()
print '%f'%(time.time()-st)
#coding:utf-8
import socket
import sys
import time
import Queue
import threading
host = 'localhost'
port = 8000
#创建socket对象
s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
#绑定一个特定地址,端口
try:
s.bind((host,port))
except Exception as e :
print 'Bind Failed:%s'%(str(e))
sys.exit()
print 'Socket bind complete!!'
#监听连接
s.listen(10) #最大连接数10
#创建连接队列
queue = Queue.Queue()
#创建线程
class TaskThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
while 1:
t = queue.get()
t.send('welecome.....')
#接收数据
client_data = t.recv(1024)
t.sendall(client_data)
#释放资源
#t.close()
#接受连接
while 1:
#将连接放入队列
conn,addr = s.accept()
print 'Connected from %s:%s'%(addr[0],str(addr[1]))
queue.put(conn)
#生成线程池
th = TaskThread()
th.setDaemon(True)
th.start()
queue.join()
s.close()
#coding:utf-8
'''
@author:FC_LAMP
'''
import urllib2,urllib,socket
import os,re,threading,Queue
import cookielib,time,Image as image
import StringIO
#30 S请求
socket.setdefaulttimeout(30)
#详情页
class spiderDetailThread(threading.Thread):
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
'Referer':'http://www.xxx.com' #这里是某图片网站
}
dir_path = 'D:/test/'
def __init__(self,queue):
threading.Thread.__init__(self)
cookie = cookielib.CookieJar()
cookieproc = urllib2.HTTPCookieProcessor(cookie)
urllib2.install_opener(urllib2.build_opener(cookieproc))
self.queue = queue
self.dir_path = dir_address
def run(self):
while True:
urls = self.queue.get()
for url in urls:
res = urllib2.urlopen(urllib2.Request(url=url,headers=self.header)).read()
patt = re.compile(r'([^<]+)<\/title>',re.I)
patt = patt.search(res)
if patt==None:
continue
#获取TITLE
title = patt.group(1).split('_')[0]#'abc/\\:*?"<>|'
for i in ['\\','/',':','*','?','"',"'",'<','>','|']:
title=title.replace(i,'')
title = unicode(title,'utf-8').encode('gbk')
print title
#获取图片
cid = url.split('/')[-1].split('c')[-1].split('.')[0]
patt = re.compile(r'new\s+Array\(".*?]+>(.*?)<\/div>"\)',re.I|re.S)
patt =patt.search(res)
if not patt:
continue
patt = patt.group(1)
src_patt = re.compile(r'.*?src=\'(.*?)\'.*?',re.I|re.S)
src_patt = src_patt.findall(patt)
if not src_patt:
continue
#创建目录
try:
path = os.path.join(self.dir_path,title)
if not os.path.exists(path):
os.makedirs(path)
except Exception as e:
pass
if not os.path.exists(path):
continue
for src in src_patt:
name = src.split('/')[-1]
#小图
s_path = os.path.join(path,name)
img = urllib2.urlopen(src).read()
im = image.open(StringIO.StringIO(img))
im.save(s_path)
#中图
src = src.replace('_s.','_r.')
name = src.split('/')[-1]
m_path = os.path.join(path,name)
img = urllib2.urlopen(src).read()
im = image.open(StringIO.StringIO(img))
im.save(m_path)
#大图
src = src.replace('smallcase','case')
src = src.replace('_r.','.')
name = src.split('/')[-1]
b_path = os.path.join(path,name)
img = urllib2.urlopen(src).read()
im = image.open(StringIO.StringIO(img))
im.save(b_path)
self.queue.task_done()
#例表页
class spiderlistThread(threading.Thread):
header = {
'User-Agent':'Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
'Referer':'http://www.xxx.com' #这里某图片网站
}
def __init__(self,queue,url):
threading.Thread.__init__(self)
cookie = cookielib.CookieJar()
cookieproc = urllib2.HTTPCookieProcessor(cookie)
urllib2.install_opener(urllib2.build_opener(cookieproc))
self.queue = queue
self.url = url
def run(self):
i = 1
while 1:
url = '%slist0-%d.html'%(self.url,i)
res = urllib2.urlopen(urllib2.Request(url=url,headers=self.header)).read()
patt = re.compile(r']+>(.*?)<\/ul>',re.I|re.S)
patt = patt.search(res)
if not patt:
break
else:
res = patt.group(1)
patt = re.compile(r'