写了一个测试网站的脚本,用多线程跑的,每个线程每秒去取一次网页,因为这个脚本用到了线程啊,time,ulrlib这些东西,也许对其他人会有用,贴出来吧。
另外一个原因是,通过这个脚本,发现python的效率真的不错,无论是开发效率还是执行效率。之前完全没用过python的thread和time,一边看python programing,一边写,也就一个小时的功夫,看上去效果还不错。
代码如下:
#!/usr/local/bin/python
#FileName = test_Web.py
#get from db
#http://192.168.1.74/spaces/posts/postdetail.aspx?id=
#put into db
#http://192.168.1.74/admin/space/post/post_add.aspx?fid=0&um=300372&v=__VERSION__&title=0&content=hahaasdfasdf
import thread, time, urllib
id_index = 100000
id_count = 20000
id_max = id_index+id_count
i_cnt = 0
time_begin = time.time()
bStop = False
def openurl():
sock = urllib.urlopen("http://192.168.1.74/admin/space/post/post_add.aspx?fid=0&um=300372&v=__VERSION__&title=0&content=hahaasdfasdf")
htmlSource = sock.read()
#print htmlSource
sock.close()
def opengeturl(id):
strUrl = "http://192.168.1.74/spaces/posts/postdetail.aspx?id=%d" % id
print strUrl
sock = urllib.urlopen(strUrl)
htmlSource = sock.read()
#print htmlSource
sock.close()
def child( myID ): # this function runs in threads
while( True ):
global id_index
global id_max
global id_count
global time_begin
global bStop
global i_cnt
if( i_cnt > id_count ):
now = time.time()
i_handle_time = now - time_begin
if( bStop == False ):
print "%d rows cost %f second" % ( id_count, i_handle_time )
bStop = True
break
#id_index = id_index + 1
i_cnt = i_cnt + 1
print "[%d] ==> %d" % (myID, id_index)
opengeturl(id_index)
time.sleep(1)
for i in range( 500 ): # spawn 3 threads
thread.start_new( child, (i,) )
time.sleep(1000000)
print 'Main thread exiting.' # don't exit too early