Python开发Http代理服务器 - socketref,呆在autonavi.com - C++博客
Python开发Http代理服务器 - socketref,呆在autonavi.com - C++博客
Python开发Http代理服务器之前开发酒店广告投放系统编写的Http代理服务程序,功能实现广告插播进Html DOM结构内。一般都是DIV被插入,当然包括script都是可以从数据库中动态获得。
简单修改之后当做Http代理服务器程序,在浏览器中设置Http转发程序的Ip即可,只要代理程序的机器能上网,客户机便能上网(其中涉及Page gzip的工作有点麻烦)1 # -*- coding:utf-8 -*-
2 # http代理服务器
3 # 1.ip限制,mac限制
4 #
5 # [email protected]
6 # www.sw2us.com
7
8 " exec " " python " " -O " " $0 " " $@ "
9
10 __doc__ = """ sw2us HTTP Proxy.
11
12 """
13
14 __version__ = " 0.2.1 "
15
16 import BaseHTTPServer, select, socket, SocketServer, urlparse
17 import httplib,traceback,re
18 import os,sys,re,mimetools,zlib,StringIO,gzip,time,StringIO
19
20
21 class ConfigProperty:
22 def __init__ (self,owner):
23 self.key = ''
24 self.value = ''
25
26 def create(self,text):
27 # text - key=value
28 # @return: boolean
29 pos = text.find( ' # ' )
30 if (pos !=- 1 ):
31 text = text[:pos]
32 pair = text.split( ' = ' )
33 if len(pair) != 2 :
34 # print "Property Line Invalid:%s"%(text)
35 return False
36 k = pair[0].strip()
37 v = pair[ 1 ].strip()
38 self.key = k
39 self.value = v
40
41 return True
42
43 def toString(self):
44 s = ''
45 try :
46 s = " %s=%s " % (self.key,self.value)
47 except :
48 return ''
49 return s
50
51 def toInt(self):
52 r = 0
53 try :
54 r = int(self.value)
55 except :
56 r = 0
57 return r
58
59 def toFloat(self):
60 r = 0.0
61 try :
62 r = float(self.value)
63 except :
64 r = 0.0
65 return r
66
67
68 # @def SimpleConfig
69 # 简单配置信息文件,基本格式 : key=value
70 class SimpleConfig:
71 def __init__ (self):
72 self._file = ''
73 self._props = []
74 self._strip = True
75
76 def open(self,file,strip = True):
77 # 打开配置文件
78 # @param strip - 是否裁剪不可见首尾两端的字符
79 try :
80 self._strip = strip
81 self._props = []
82 fh = open(file, ' r ' )
83 lines = fh.readlines()
84 for text in lines:
85 prop = ConfigProperty(self)
86 if prop.create(text) == False:
87 prop = None
88 else :
89 self._props.append(prop)
90 fh.close()
91 except :
92 return False
93 return True
94
95 def toString(self):
96 s = ''
97 for p in self._props:
98 s = s + p.toString() + " \n "
99 return s
100
101 def saveAs(self,file):
102 # 保存配置信息到文件
103 try :
104 fh = open(file, ' w ' )
105 fh.write(toString())
106 fh.close()
107 except :
108 print " write File Failed! "
109 return False
110 return True
111
112 def getProperty(self,name):
113 # 取属性值
114 prop = None
115 try :
116 for p in self._props:
117 if p.key == name:
118 prop = p
119 break
120 except :
121 pass
122
123 return prop
124
125 def getPropertyValue(self,key,default = '' ):
126 prop = self.getProperty(key)
127 if not prop:
128 return default
129 return prop.value
130
131 def getPropertyValueAsInt(self,name,default = 0):
132 prop = self.getPropertyValue(name)
133
134 if not prop:
135 return default
136 r = default
137 try :
138 r = int(prop)
139 except : pass
140 return r
141
142 def getPropertyValueAsFloat(self,name,default = 0.0 ):
143 prop = self.getPropertyValue(name)
144 if not prop:
145 return default
146 r = default
147 try :
148 r = float(r)
149 except : pass
150 return r
151
152
153 # ===========================================#
154
155
156 # ===========================================#
157
158 def getMacList():
159 maclist = []
160 f = os.popen( ' arp -a ' , ' r ' )
161 while True:
162 line = f.readline()
163 if not line:
164 break
165 line = line.strip()
166 rst = re.match( ' ^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+([0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}).* ' ,line)
167 # rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})',line)
168 if rst:
169 # print rst.groups()
170 maclist.append(rst.groups())
171 # print maclist
172 return maclist
173
174
175
176 # #########################################
177 confile = SimpleConfig()
178 confile.open( ' proxy.conf ' )
179 dbconn = None
180
181 # #########################################
182 # 初始化系统配置
183 def initConfiguration():
184 r = True
185
186 return r
187
188 # #########################################
189
190 class ProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):
191 __base = BaseHTTPServer.BaseHTTPRequestHandler
192 __base_handle = __base .handle
193 server_version = " TinyHTTPProxy/ " + __version__
194 rbufsize = 0 # self.rfile Be unbuffered
195
196
197 # ######################################################33
198
199 # handle()是在单独线程中执行
200 def handle(self): # 调用入口,线程刚进入,携带socket进入
201 print ' client incoming '
202 # self.__base_handle()
203 # return
204 (ip, port) = self.client_address
205 if hasattr(self, ' allowed_clients ' ) and ip not in self.allowed_clients:
206 self.raw_requestline = self.rfile.readline()
207 if self.parse_request():
208 self.send_error( 403 )
209 else :
210 self. __base_handle ()
211
212 def _connect_to(self, netloc, soc):
213 i = netloc.find( ' : ' )
214 if i >= 0:
215 host_port = netloc[:i], int(netloc[i + 1 :])
216 else :
217 host_port = netloc, 80
218 # print "\t" "connect to %s:%d" % host_port
219 try : soc.connect(host_port)
220 except socket.error, arg:
221 try : msg = arg[ 1 ]
222 except : msg = arg
223 self.send_error( 404 , msg)
224 return 0
225 return 1
226
227 def do_CONNECT(self):
228 soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
229 try :
230 if self._connect_to(self.path, soc):
231 self.log_request( 200 )
232 self.wfile.write(self.protocol_version +
233 " 200 Connection established\r\n " )
234 self.wfile.write( " Proxy-agent: %s\r\n " % self.version_string())
235 self.wfile.write( " \r\n " )
236 self._read_write(soc, 300 )
237 finally :
238 print " \t " " bye "
239 soc.close()
240 self.connection.close()
241
242
243 def do_GET(self):
244 (scm, netloc, path, params, query, fragment) = urlparse.urlparse(
245 self.path, ' http ' )
246 piars = (scm, netloc, path, params, query, fragment)
247 if not netloc:
248 netloc = self.headers.get( ' Host ' , "" )
249 # print ">>requester:",self.connection.getpeername(),"path:",self.path
250 # print '>>2. ',(scm, netloc, path, params, query, fragment)
251 # print 'next host:',netloc
252 if scm != ' http ' or fragment or not netloc:
253 self.send_error( 400 , " bad url %s " % self.path)
254 return
255 soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
256 try :
257 if self._connect_to(netloc, soc):
258 self.log_request()
259 soc.send( " %s %s %s\r\n " % (
260 self.command,
261 urlparse.urlunparse(( '' , '' , path, params, query, '' )),
262 self.request_version))
263 self.headers[ ' Connection ' ] = ' close '
264 del self.headers[ ' Proxy-Connection ' ]
265 for key_val in self.headers.items():
266 soc.send( " %s: %s\r\n " % key_val)
267 soc.send( " \r\n " )
268 # 到此完成发送请求和头部信息
269 self._read_write(soc)
270 finally :
271 print " \t " " bye "
272 soc.close()
273 self.connection.close()
274
275
276
277 def insertTags(self,tag,body,insert):
278 p1 = body.find( ' <%s ' % tag)
279 if p1 !=- 1 :
280 p2 = body.find( ' > ' ,p1)
281 if p2 !=- 1 :
282 part1 = body[:p2 + 1 ]
283 part2 = body[p2 + 1 :]
284 print ' *- ' * 20
285 body = part1 + insert + part2
286 return body
287
288 # google页面的数据请求时,返回的数据进行的是gzip压缩,所以过滤文本存在问题,先要解压缩之后才可以
289 # 插入数据之后要重新计算 content-length 并返回给客户浏览器
290 # 发现压缩的有很多 , content-encoding:gzip
291
292 # 处理 'transfer-encoding': 'chunked'类型
293 # gzip 有两种存储,一种是直接gzip压缩的数据跟在header之后;另外一种是采用chunck块存储
294 # 在这里将gzip数据全部解压,还原成原始数据传出到客户端
295 def sendBackResponse(self,command,headers,body):
296
297 insert = 'This is Test
'
298 if headers.has_key( ' content-encoding ' ) and headers[ ' content-encoding ' ].strip().lower() == ' gzip ' :
299 try :
300 del headers[ ' content-encoding ' ]
301 gzipdata = ''
302 if headers.has_key( ' transfer-encoding ' ) and headers[ ' transfer-encoding ' ] == ' chunked ' :
303 del headers[ ' transfer-encoding ' ]
304
305 pos = 0
306 while pos < len(body):
307 p = body.find( ' \x0d\x0a ' ,pos)
308 sizewidth = p - pos
309
310 chuncksize = int(body[pos:p], 16 )
311 # print 'chunck size:',body[pos:p]
312 p += 2
313 gzipdata += body[p:p + chuncksize]
314 pos = p + chuncksize + 2
315 if chuncksize == 0 :
316 break
317 #
318 body = gzipdata
319
320 #
321
322 # ss = zlib.decompress(gzipdata)
323 compressedstream = StringIO.StringIO(body)
324 gzipper = gzip.GzipFile(fileobj = compressedstream)
325 if gzipper == None:
326 print ' * ' * 200
327 body = gzipper.read()
328 # f = open('body%s.txt'%time.time(),'wb')
329 # f.write(body)
330 # f.close()
331
332
333 # body = gzipdata
334 except :
335 print traceback.print_exc()
336 print ' decompress failed! '
337 # pos = body.find('\x0d\x0a')
338 # pos = body.find('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
339 # if pos!=-1:
340 # body = body[pos+9:]
341 #
342 # compressedstream = StringIO.StringIO(body)
343 # gzipper = gzip.GzipFile(fileobj=compressedstream)
344 # if gzipper == None:
345 # print '*'*200
346 # body = gzipper.read()
347
348 # body = zlib.decompressobj().decompress('x\x9c'+body)
349
350 # m = re.search('()',body,re.I)
351 # if m:
352 # pos = m.start(0)
353 # part1 = body[:pos+len(m.group(0))]
354 # part2 = body[pos+len(m.group(0)):]
355 # body = part1 + insert + part2
356 # print '-*'*20,insert,'-*'*20
357
358 # self.insertTags('body',body,insert)
359
360 css = """
369 """
370 # body =self.insertTags('head',body,css)
371
372 # body =self.insertTags('body',body,insert)
373 div = """
374
375 This is Test DIV Block!!
376
377 """
378
379 # read external html tags
380 try :
381 # ff = open('head.tag','r')
382 # div = ff.read()
383 # ff.close()
384 # body =self.insertTags('head',body,div)
385 body = self.publish_advertisement(body) # 插入配置的广告信息
386 except :
387 pass
388
389 # p1 = body.find('
390 # if p1!=-1 :
391 # p2 = body.find('>',p1)
392 # if p2!=-1:
393 # part1 = body[:p2+1]
394 # part2 = body[p2+1:]
395 # print '*-'*20
396 # body = part1 + insert + part2
397 # print m.group(0)
398 headers[ ' Content-Length ' ] = str(len(body))
399
400 # if headers.has_key('content-length'):
401
402 self.connection.send(command)
403 self.connection.send( ' \r\n ' )
404 for k,v in headers.items():
405 self.connection.send( " %s: %s\r\n " % (k,v))
406 self.connection.send( " \r\n " )
407 self.connection.sendall(body)
408
409
410
411 # ----------------------------------------------------
412
413 def _read_write(self, soc, max_idling = 20 ):
414 # getMacList()
415 iw = [self.connection, soc] # self.connnection - 内网主机连接,soc - 向外连接
416 ow = []
417 count = 0
418 # respfile = soc.makefile('rb', 1024)
419 httpCommand = ''
420 httpBody = ''
421 httpHeaders = {}
422 isOkPageResponse = False
423 nextReadBytes = 0
424 datacnt = 0
425 NoContentLength = False
426 # print self.connection.getpeername()
427 while 1 :
428 count += 1
429 datacnt += 1
430 (ins, _, exs) = select.select(iw, ow, iw, 3 )
431 if exs:
432 print ' error occr! '
433 break # 异常产生
434 if ins:
435 for i in ins:
436 if i is soc:
437 out = self.connection
438 else :
439 out = soc
440
441 data = i.recv( 8192 )
442 if data:
443 out.send(data)
444 count = 0
445 else :
446 if not isOkPageResponse:
447 return
448 else :
449 pass # print "\t" "idle", count
450 if count == max_idling:
451 print ' idling exit '
452 break # 指定时间内都接收不到双向数据便退出循环 20*3 = 60 secs
453
454
455 do_HEAD = do_GET
456 do_POST = do_GET
457 do_PUT = do_GET
458 do_DELETE = do_GET
459
460 class ThreadingHTTPServer (SocketServer.ThreadingMixIn,
461 BaseHTTPServer.HTTPServer): pass
462
463
464
465
466 def serving(HandlerClass,
467 ServerClass, protocol = " HTTP/1.0 " ):
468
469 if len(sys.argv) < 2 or sys.argv[ 1 ] != ' www.sw2us.com ' :
470 sys.exit()
471
472 if sys.argv[ 2 :]:
473 port = int(sys.argv[ 2 ])
474 else :
475
476 port = confile.getPropertyValueAsInt( ' httpport ' , 8000 )
477
478 # port = 8000
479
480 server_address = ( '' , port)
481
482 HandlerClass.protocol_version = protocol
483 httpd = ServerClass(server_address, HandlerClass)
484
485 sa = httpd.socket.getsockname()
486 print " www.sw2us.com@2010 v.1.0.0 "
487 print " Serving HTTP on " , sa[0], " port " , sa[ 1 ], " "
488 sys.stdout = buff
489 sys.stderr = buff
490
491 httpd.serve_forever()
492
493
494
495 if __name__ == ' __main__ ' :
496 # getMacList()
497 from sys import argv
498
499 f = open( ' proxy.pid ' , ' w ' )
500 f.write(str(os.getpid()))
501 f.close()
502
503 # ProxyHandler.allowed_clients = []
504 try :
505 allowed = []
506 ss = confile.getPropertyValue( ' allowed_clients ' ).strip()
507 hosts = ss.split( ' , ' )
508 for h in hosts:
509 if h:
510 client = socket.gethostbyname(h.strip())
511 allowed.append(client)
512 if len(allowed):
513 ProxyHandler.allowed_clients = allowed
514 buff = StringIO.StringIO()
515
516 serving(ProxyHandler, ThreadingHTTPServer)
517 except :
518 pass
posted on
2012-04-30 08:43 lexus 阅读(
...) 评论(
...) 编辑 收藏