python网络编程学习笔记(一)

python网络编程学习笔记(一)

python网络编程基础,第四版
pycharm实现,python版本2.7.5

第一部分 底层网络

一、笔记

#coding=utf-8
第一章

import socket,sys
port=70
print (len(sys.argv))
for i in range(len(sys.argv)):
    print (sys.argv[i])
host=sys.argv[1]
filename=sys.argv[2]

s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
print ("old s is:",s)
print (host)
s.connect((host,port))
print ("new s is:",s)

s.sendall(filename+"\r\n")

while 1:
    buf=s.recv(2048)
    if not len(buf):
        break
    sys.stdout.write(buf)

加入错误处理

import socket,sys
port=70
print (len(sys.argv))
for i in range(len(sys.argv)):
    print (sys.argv[i])
host=sys.argv[1]
filename=sys.argv[2]

s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
print ("old s is:",s)
print (host)

try:
  s.connect((host, port))
except socket.gaierror,e:
    print ("ERROR connection to server:%s" %e)
    sys.exit(1)
s.sendall(filename+"\r\n")

while 1:
    buf=s.recv(2048)
    if not len(buf):
        break
    sys.stdout.write(buf)

文件接口类重写
import socket,sys
port=70
host=sys.argv[1]
filename=sys.argv[2]

s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
s.connect((host,port))
fd=s.makefile('rw',0)
fd.write(filename+"\r\n")


for line in fd.readlines():
    sys.stdout.write(line)

基本服务器操作

import socket

host=''
port=80

s=socket.socket(socket.AF_INET,socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
s.bind((host,port))
s.listen(1)

print "Server is running on port %d;press ctrl-c to\
    terminate."% port

while 1:
    clientsock,clientaddr=s.accept()
    clientfile=clientsock.makefile('rw',0)
    clientfile.write("welcome,"+str(clientaddr)+"/n")
    clientfile.write("Please enter a string:")
    line=clientfile.readline().strip()
    clientfile.write("You entered %d characters.\n"%len(line))
    clientfile.close()
    clientsock.close()

高级接口

import gopherlib,sys
host=sys.argv[1]
file=sys.argv[2]

f=gopherlib.send_selector(file,host)
for line in f.readlines():
    sys.stdout.write(line)

import urllib,sys
host=sys.argv[1]
file=sys.argv[2]

f=urllib.urlopen('gopher://%s%s'%(host,file))
for line in f.readlines():
    sys.stdout.write(line)

import urllib,sys
f=urllib.urlopen(sys.argv[1])
while 1:
    buf=f.read(2048)
    if not len(buf):
        break
    sys.stdout.write(buf)

第二章
使用udp

第三章 网络服务器

import socket
solist=[x for x in dir(socket) if x.startswith('SO')]
solist.sort()
for x in solist:
    print x

第四章 域名系统

import sys,socket

result=socket.getaddrinfo(sys.argv[1],None)
print result[0][4]

import sys,socket

result=socket.getaddrinfo(sys.argv[1],None)
counter=0
for item in result:
    print "%-2d:%s"%(counter,item[4])
    counter+=1

P70
import sys,socket
result=socket.getaddrinfo(sys.argv[1],None,0,socket.SOCK_STREAM)
counter=0
for item in result:
    print "%-2d:%s"%(counter,item[4])
    counter+=1

执行反向查询

import sys,socket

try:
    result=socket.gethostbyaddr(sys.argv[1])

    print "Primary hostname:"
    print " "+result[0]

    print "\nAddresses:"
    for item in result[2]:
        print " "+item

except socket.herror,e:
    print "Couldn't look up name:",e

正反向查询

import sys,socket
def getipaddrs(hostname):
     result=socket.getaddrinfo(hostname,None,0,socket.SOCK_STREAM)
     return [x[4][0] for x in result]
def gethostname(ipaddr):
    return socket.gethostbyaddr(ipaddr)[0]

try:
    hostname=gethostname(sys.argv[1])
    ipaddrs=getipaddrs(hostname)
except socket.herror,e:
    print "NO host names available for %s;it may be normal"%sys.argv[1]
    sys.exit(0)
except socket.gaierror,e:
    print "Got hostname %s,but it could not be forward-resolved:%s"%(hostname,str(e))
    sys.exit(1)

if not sys.argv[1] in ipaddrs:
    print "GOt hostnae %s,but no forward lookup,"% hostname
    print "original IP %s did not appear in IP address list"% sys.argv[1]
    sys.exit(1)

print "Validated hostname:",hostname

获得完整域名,gethostname()获得主机名,getfqdn()获得完整信息,getaddrinfo()获得该域名对性的IP地址。

import sys,socket

def getipaddrs(hostname):
    result=socket.getaddrinfo(hostname,None,0,socket.SOCK_STREAM)
    return [x[4][0] for x in result]

hostname=socket.gethostname()
print "Host name:",hostname

print "Fully-qualified name:",socket.getfqdn(hostname)
try:
    print "IP addresses:",",".join(getipaddrs(hostname))
except socket.gaierror,e:
    print "Couldn't not get IP addresses:",e

import sys,DNS
query=sys.argv[1]
DNS.DiscoverNameServers()

reqobj=DNS.Request()

answerobj=reqobj.req(name=query,qtrpe=DNS.Type.ANY)
if not len(answerobj.answers):
    print "NOT found."
for item in answerobj.answers:
    print "%-5s %s"%(item['typename'],item['data'])

import sys,DNS

def hierquery(qstring,qtype):#给出主机名的相应服务器
    reqobj=DNS.Request()#建立查询对象实例
    try:
        answerobj=reqobj.req(name=qstring,qtype=qtype)
        answers=[x['data'] for x in answerobj.answers if x['type']==qtype]
    except DNS.Base.DNSError:
        answers=[]
    if len(answers):
        return answers
    else:
        remainder=qstring.split(".",1)
        if len(remainder)==1:
            return None
        else:
            return hierquery(remainder[1],qtype)


def findnameservers(hostname):#取得权威名称服务器列表
    return hierquery(hostname,DNS.Type.NS)

def getrecordsfromnameserver(qstring,qtype,nslist):#在服务器查询,直到找到答案或者查完该表
    for ns in nslist:
        reqobj=DNS.Request(server=ns)
        try:
            answers=reqobj.req(name=qstring,qtype=qtype).answers
            if len(answers):
                return answers
        except DNS.Base.DNSError:
            pass
        return []

def nslookup(qstring,qtype,verbose=1):
    nslist=findnameservers(qstring)
    if nslist==None:
        raise RuntimeError,"Could not find nameserver to use."
    if verbose:
        print "using nameserver:",",".join(nslist)
    return getrecordsfromnameserver(qstring,qtype,nslist)

if __name__=='__main__':
    query=sys.argv[1]
    DNS.DiscoverNameServers()

    answers=nslookup(query,DNS.Type.ANY)
    if not len(answers):
        print "not found."
    for item in answers:
        print "%-5s %s"%(item['typename'],item['data'])

第五章
超时的用法
echoserver.py

import socket,traceback

host = ''
port = 51432

s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
s.bind((host,port))
s.listen(1)

while True:
    try:
        clientsock,clientaddr = s.accept()
    except KeyboardInterrupt:
        raise
    except:
        traceback.print_exc()
        continue

    try:
        print "Got connection from",clientsock.getpeername()
        while  True:
            data = clientsock.recv(4096)
            if not len(data):
                break
            clientsock.sendall(data)
    except (KeyboardInterrupt,SystemExit):
        raise
    except:
        traceback.print_exc()

    try:
        clientsock.close()
    except KeyboardInterrupt:
        raise
    except:
        traceback.print_exc()


import struct,sys

def htones(num):
    return struct.pack('!H',num)

def htonl(num):
    return struct.pack('!I',num)

def ntohs(data):
    return struct.unoack('!H',data)[0]

def ntohl(data):
    return struct.unpack('!I',data)[0]

def sendstring(data):
    return htonl(len(data))+data

print "Enter a string:"
str=sys.stdin.readline().rstrip()

print repr(sendstring(str))


import socket,sys

host,port = sys.argv[1:]

results = socket.getaddrinfo(host,port,0,socket.SOCK_STREAM)

for result in results:
    print "-"*60

    if result[0] == socket.AF_INET:
        print "Family: AF_INET"
    elif result[0] == socket.AF_INET6:
        print "Family: AF_INET6"
    else:
        print "Family:",result[0]

    if result[1] == socket.SOCK_STREAM:
        print "Socket Type: SOCK_STREAM"
    elif result[1] == socket.SOCK_DGRAM:
        print "Socket Type: SOCK_DGRAM"

    print "Protocol:",result[2]
    print "Canonical Name:",result[3]
    print "Socket Address:",result[4]

先找ipv4,再找ivp6
Connect Example with ipv6 Awareness ------------- ipv6connect.py

import socket,sys

def getaddrinfo_pref(host,port,socktype,familypreference=socket.AF_INET):#ipv4

    results = socket.getaddrinfo(host,port,0,socktype)

    for result in results:
        if result[0] == familypreference:
            return result
    return results[0]

host = sys.argv[1]
port = 'http'

c = getaddrinfo_pref(host,port,socket.SOCK_STREAM)
print "Connecting to",c[4]

s = socket.socket(c[0],c[1])
s.connect(c[4])
s.sendall("HEAD / HTTP/1.0\n\n")

while True:
    buf = s.recv(4096)

    if not len(buf):
        break
    sys.stdout.write(buf)


Echo Server Bound to Specific Address
bindserver.py

import socket,traceback

host = '127.0.0.1'
port = 51423


s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
s.bind((host,port))
s.listen(1)


while True:
    clientsock,clientaddr = s.accept()

    print "Got connection from",clientsock.getpeername()

    while True:
        data = clientsock.recv(4096)
        if not len(data):
            break
        clientsock.sendall(data)

    clientsock.close()

pull()

import socket,sys,select


port = 51423
host = 'localhost'

spinsize = 10
spinpos = 0
spindir = 1

def spin():
    global spinsize,spinpos,spindir

    spinstr = '.' * spinpos + '|' + '.'*(spinsize-spinpos-1)
    sys.stdout.write('\r'+spinstr+' ')
    sys.stdout.flush()

    spinpos += spindir

    if spinpos < 0:
        spindir = 1
        spinpos = 1
    elif spinpos >= spinsize:
        spinpos -= 2
        spindir = -1

s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
s.connect((host,port))

p = select.poll()

p.register(s.fileno(),select.POLLIN | select.POLLERR | select.POLLHUP)

while True:
    results = p.poll(50)

    if len(results):
        if results[0][1] == select.POLLIN:
            data = s.recv(4096)
            if not len(data):
                print ("\rRemove end closed connection ; exiting.")
                break
            sys.stdout.write("\rReceived: " + data)
            sys.stdout.flush()

        else:
            print "\rProblem occurred exitng."
            sys.exit(0)
    spin()


selectclient.py

import socket,sys,select

port = 51423
host = 'localhost'

spinsize = 10
spinpos = 0
spindir = 1

def spin():
    global spinsize,spinpos,spindir

    spinstr = '.' * spinpos + '|' + '.' *(spinsize - spinpos -1)
    sys.stdout.write('\r' + spinstr +' ')
    sys.stdout.flush()

    spinpos += spindir

    if spinpos < 0:
        spindir = 1
        spinpos = 1
    elif spinpos >= spinsize:
        spinpos -= 2
        spindir = -1

s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
s.connect((host,port))

while True:
    infds,outfds,errfds = select.select([s],[],[s],0.05)

    if len(infds):

        data = s.recv(4096)
        if not len(data):
            print("\rRemote end closed connection; Exiting.")
            break
        sys.stdout.write("\rReceived: " + data)
        sys.stdout.flush()

    if len(errfds):
        print "\rProblen occurred; exiting."
        sys.exit(0)
    spin()

二、TCP通信方式

服务端:

#coding=utf-8
from socket import*

#监听套接字的连接和回应
#服务器端
myHost='' #‘’代表主机所有可用端口
myPort=50007

sockobj=socket(AF_INET,SOCK_STREAM)#创建一个TCP scoket 对象
sockobj.bind((myHost,myPort))#绑定服务端口号
sockobj.listen(5)#监听,允许5个挂起连接

while True:#一直监听直到进程被杀死
    connection,address=sockobj.accept()#等待下个客户端连接
    print('Server connected by:',address)#连接是新的scoket
    while True:
        data=connection.recv(1024)#读取新的客户端scoket,for 循环接收
        if not data:break#发送接收报文给客户端
        connection.send(b'Echo get your message:'+data)#直到结束关闭scoket,发送只能是b,bite格式
    connection.close()

客户端:

#coding=utf-8
import socket

#客户端
import sys
from socket import *
serverHost='localhost'
serverPort=50007

message=[b'hello network world']

if len(sys.argv)>1:
    serverHost=sys.argv[1]
    if len(sys.argv)>2:
        message=(x.encode()for x in sys.argv[2:])

sockobj=socket(AF_INET,SOCK_STREAM)
sockobj.connect((serverHost,serverPort))

for line in message:
    sockobj.send(line)
    data=sockobj.recv(1024)#1024字节
    print('Client received:',data)

sockobj.close()

三、UDP通信方式

UDP请求端:

#coding=utf-8
import socket,sys

host=sys.argv[1]
textpost=sys.argv[2]


s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
try:
    port=int(textpost)
except ValueError:
    print "输入错入"
    port=socket.getservbyname(textpost,'udp')

s.connect((host,port))
print "Enter data to transmit: "
data=sys.stdin.readline().strip()
s.sendall(data)
print "Looking for replies."
while(1):
    buf=s.recv(2048)
    if not len(buf):
        break
    sys.stdout.write(buf)
UDP应答端:
#coding=utf-8
import socket,traceback

host=''
port=54132

s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
s.bind((host,port))

while 1:
    try:
        message,address=s.recvfrom(8192)
        print "Got data from",address
        s.sendto(message,address)
    except(KeyboardInterrupt,SystemExit):
        raise
    except:
        traceback.print_exc()
UDP查询时间
服务端:
#coding=utf-8
import socket,traceback,time,struct

host=''
port=51432

s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
s.bind((host,port))

while 1:
    try:
        message,address=s.recvfrom(8192)
        print message,address
        secs=int(time.time())
        secs-=60*60*24
        secs+=220898800
        reply=struct.pack("!I",secs)
        s.sendto(reply,address)
    except(KeyboardInterrupt,SystemExit):
        raise
    except:
        traceback.print_exc()

客户端:

#coding=utf-8
import socket,sys,struct,time
hostname='localhost'
port=51432

host=socket.gethostbyname(hostname)
s=socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
s.sendto('',(host,port))

print "Loking for replies"
buf=s.recvfrom(2048)[0]
if len(buf)!=4:
    print "Wrong-size reply %d:%s"%(len(buf),buf)
    sys.exit(1)

secs=struct.unpack("!I",buf)[0]
secs-=220898800
print time.ctime(int(secs))
超时:
#coding=utf-8
import socket,traceback

host = ''
port = 51432

s = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
s.bind((host,port))
s.listen(1)

while True:
    try:
        clientsock,clientaddr = s.accept()
    except KeyboardInterrupt:
        raise
    except:
        traceback.print_exc()
        continue

    try:
        print "Got connection from",clientsock.getpeername()
        while  True:
            data = clientsock.recv(4096)
            if not len(data):
                break
            clientsock.sendall(data)
    except (KeyboardInterrupt,SystemExit):
        raise
    except:
        traceback.print_exc()

    try:
        clientsock.close()
    except KeyboardInterrupt:
        raise
    except:
        traceback.print_exc()
四、ftp
#coding=utf-8
#自动抓取并打开远程文件文件
import os,sys
from getpass import getpass
from ftplib import FTP

nonpassive=False
filename='monkeys.jpg'
dirname='.'
sitename='ftp.rmi.net'
userinfo=('lutz',getpass('pwd?'))
if len(sys.argv)>1:filename=sys.argv[1]

print('Connection...')
connection=FTP(sitename)
connection.login(*userinfo)
connection.cwd(dirname)
if nonpassive:
    connection.set_pasv(False)

print('Downloading...')
localfile=open(filename,'wb')
connection.retrbinary('RETR'+filename,localfile.write,1024)
connection.quit()
localfile.close()

if input('Open file?') in ['Y','y']:
    from PP4E.System.Media.playfile import playfile
    playfile(filename)

五、广播

接收端:

#coding=utf-8
import socket,traceback

host = ''
port = 51423

s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
s.setsockopt(socket.SOL_SOCKET,socket.SO_REUSEADDR,1)
s.setsockopt(socket.SOL_SOCKET,socket.SO_BROADCAST,1)
s.bind((host,port))

while True:
    try:
        message,address =s.recvfrom(8192)
        print "Got data from ",address
        s.sendto("I am here",address)
    except (KeyboardInterrupt,SystemExit):
        raise
    except:
        traceback.print_exc()

发送端:

#coding=utf-8
import socket,sys
dest = ('',51423)

s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
s.setsockopt(socket.SOL_SOCKET,socket.SO_BROADCAST,1)
s.sendto("Hello",dest)

print "Looking for replies; press Ctrl-C to stop."

while True:
    (buf,address) = s.recvfrom(2048)
    if not len(buf):
        break
    print "Received from %s: %s" % (address,buf)

第二部分web Service

一、web客户端访问

urllib2扩展性更好
1.下载Web界面
2.在远程HTTP服务器上验证
3.提交表单(from)数据
4.处理错误
5.与非HTTP协议通信

1.下载Web界面

(1)

#coding=utf-8
import sys,urllib2

req=urllib2.Request(sys.argv[1])
fd=urllib2.urlopen(req)
while 1:
    data=fd.read(1024)
    if not len(data):
        break
    sys.stdout.write(data)

sys.stdout 是标准输出文件。write就是往这个文件写数据。
合起来就是打印数据到标准输出。类似print

运行结果:

D:\python\python.exe E:/code/python/unit6/dump_page.py
http://www.example.com




    Example Domain

    
    
    
    



Example Domain

This domain is established to be used for illustrative examples in documents. You may use this domain in examples without prior coordination or asking for permission.

More information...

Process finished with exit code 0

(2)

#coding=utf-8
import sys,urllib2

req=urllib2.Request(sys.argv[1])
fd=urllib2.urlopen(req)
print "Retrieved",fd.geturl()
info=fd.info()
for key,value in info.items():
    print "%s=%s"%(key,value)

运行结果如下:
D:\python\python.exe E:/code/python/unit6/dump_info.py http://httpd.apache.org/dev
Retrieved http://httpd.apache.org/dev/
content-length=8870
accept-ranges=bytes
vary=Accept-Encoding
server=Apache/2.4.7 (Ubuntu)
last-modified=Wed, 25 Jan 2017 14:38:55 GMT
connection=close
etag="22a6-546ec313cb061"
date=Fri, 17 Mar 2017 06:29:52 GMT
content-type=text/html

Process finished with exit code 0

注:从geturl()得到的值与传入Request的对象不同,结尾处多了一条斜线,远程服务器做了一个Http转向,urllib自动跟随了转向。
其他行显示Http的header信息;

2.在远程HTTP服务器上验证

(1)

#coding=utf-8
import sys,urllib2,getpass

class TerminalPassword(urllib2.HTTPPasswordMgr):
    def find_user_password(self, realm, authuri):
        ret=urllib2.HTTPPasswordMgr.find_user_password(self,realm,authuri)

        if ret[0] == None and ret[1] == None:
            sys.stdout.write("Login reauired for %s at %sn" % (realm,authuri))
            sys.stdout.write("Username: ")
            username = sys.stdin.readline().rstrip()
            password = getpass.getpass().rstrip()
            return (username, password)
        else:
            return ret
req = urllib2.Request(sys.argv[1])
opener = urllib2.build_opener(urllib2.HTTPBasicAuthHandler(TerminalPassword()))
response = opener.open(req)
print response.read()

扩展urllib2.HTTPPasswordMgr类,允许程序在需要的时候像操作员询问用户名和密码,
build_opener:允许指定额外的处理程序,代码需要支持认证,所以HTTPBasicAuthHandler加到处理链接

3.提交表单(from)数据
GET方法:把表单数据编码至url,在给出请求的页面后,加一个问号,接着是表单的元素。每个键和值对用“&”分割,有些字符需要被避免。不适合数据量比较大的地方。

(1)

代码:
#coding=utf-8
import sys,urllib2

req=urllib2.Request(sys.argv[1])
fd=urllib2.urlopen(req)
while 1:
    data=fd.read(1024)
    if not len(data):
        break
    sys.stdout.write(data)

sys.stdout 是标准输出文件。write就是往这个文件写数据。
合起来就是打印数据到标准输出。类似print

运行结果:
D:\python\python.exe E:/code/python/unit6/dump_page.py http://weixin.sogou.com/weixin?p=01030402&query=%E5%8D%9A%E5%AE%A2%E5%9B%AD&type=2&ie=utf8








注:必须给url加上引号

(2)
代码:

#coding=utf-8
import sys,urllib2,urllib

def addGETdata(url,data):
    return url+'?'+urllib.urlencode(data)

zipcode=sys.argv[1]
url=addGETdata('http://www.weather.com.cn/cgi-bin/findweather/getForecast',[('query',zipcode)])

print "using URL",url
req=urllib2.Request(url)
fd=urllib2.urlopen(req)
while 1:
    data=fd.read(1024)
    if not len(data):
        break
    sys.stdout.write(data)

注:函数addGETdata(url,data)负责在url结尾添加所有的数据。在内部,他在URL和通过urllib.urlencode()得到的数据间添加问号。

POST方法:单独部分发送。URL永远不会被修改,附加信息通过第二个参数传递给urlopen().

(3)
代码:

#coding=utf-8
import sys,urllib2,urllib

zipcode=sys.argv[1]
url='http://www.wunderground.com/cgi-bin/findweather/getForcecast'
data=urllib.urlencode([('query',zipcode)])
req=urllib2.Request(url)
fd=urllib2.urlopen(req,data)
while 1:
    data=fd.read(1024)
    if not len(data):
        break
    sys.stdout.write(data)

4.处理错误

(1)
代码:

#coding=utf-8
import sys,urllib2

req=urllib2.Request(sys.argv[1])

try:
    fd=urllib2.urlopen(req)
except urllib2.URLError,e:
    print "Error reteiveving data:",e
    sys.exit(1)
print "Retrieved",fd.geturl()
info=fd.info()
for key,value in info.items():
    print "%s=%s"% (key,value)

运行结果:

D:\python\python.exe E:/code/python/unit6/error_basic.py
https://www.wunderground.com/cgi-bin/findweather/getForcecast
Error reteiveving data: HTTP Error 404: Not Found

Process finished with exit code 1

(2)
代码:

#coding=utf-8
# import sys,urllib2
#
# req=urllib2.Request(sys.argv[1])
#
# try:
#     fd=urllib2.urlopen(req)
# except urllib2.URLError,e:
#     print "Error reteiveving data:",e
#     sys.exit(1)
# print "Retrieved",fd.geturl()
# info=fd.info()
# for key,value in info.items():
#     print "%s=%s"% (key,value)

import sys,urllib2

req=urllib2.Request(sys.argv[1])

try:
    fd=urllib2.urlopen(req)
except urllib2.HTTPError,e:
    print "Error reteiveving data:",e
    print "Server error document follows:\n"
    print e.read
    sys.exit(1)
except urllib2.URLError,e:
    print "Error retriveving data",e
    sys.exit(2)

print "Retrieved",fd.geturl()
info=fd.info()
for key,value in info.items():
    print "%s=%s"% (key,value)

运行结果:

D:\python\python.exe E:/code/python/unit6/error_basic.py
https://www.wunderground.com/cgi-bin/findweather/getForcecast
Error reteiveving data: HTTP Error 404: Not Found
Server error document follows:

0x0216A5B0>>

Process finished with exit code 1

注:如果产生了一个HTTPEroor的实力,会捕获异常打印细节。否则,urllib2.URLError类的实例,会显示一条URLError信息。

读取数据错误:
通信错误,会使socket模块调用read()函数时发生socket.error;(会通过系统层传递)
没有通信情况下发送的文档被删节;

(3)
代码:

#coding=utf-8
import sys,urllib2,socket

req=urllib2.Request(sys.argv[1])

try:
    fd=urllib2.urlopen(req)
except urllib2.HTTPError,e:
    print "Error retrieving data:",e
    print "Sever error document follows:\n"
    print e.read()
    sys.exit(1)
except urllib2.URLError,e:
    print "Error retrieving data:",e
    sys.exit(2)

print "Retrieved",fd.geturl()

bytesread=0

while 1:
    try:
        data=fd.read(1024)
    except socket.error,e:
        print "Error reading data:",e
        sys.exit(3)

    if not len(data):
        break
    bytesread+=len(data)
    sys.stdout.write(data)

    if fd.info().has_key('Content-Length') and long(fd.info()['Content-Length'])!=long(bytesread):
        print "Excepted a document of size %d,but read %d bytes"%(long(fd.info()['Content-Length']),bytesread)
        sys.exit(4)

运行结果:


> D:\python\python.exe E:/code/python/unit6/erroe_all.py
> https://www.wunderground.com/cgi-bin/findweather/getForcecast
> Error retrieving data: HTTP Error 404: Not Found
> Sever error document follows:
> 
> 
> 
> 
>   
>   
>       Error | Weather Underground
>       
>       
> 
> 

二、解析html和xhtml

第七章 解析Html 和XHtml p151-p168
1.提取标题
代码:

#coding=utf-8
from HTMLParser import HTMLParser
import sys

class TitleParser(HTMLParser):
    def __init__(self):
        self.title=''
        self.readingtitle=0
        HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        if tag =='title':
            self.readingtitle = 1

    def handle_data(self, data):
        if self.readingtitle:
            self.title += data

    def handle_endtag(self, tag):
        if tag == 'title':
            self.readingtitle = 0

    def gettitle(self):
        return self.title

fd = open(sys.argv[1])
tp = TitleParser()
tp.feed(fd.read())
print "Title is:",tp.gettitle()

运行结果:

D:\python\python.exe E:/code/python/unit7/basic_title.py
E:/code/python/unit7/faqs.html
Title is: Appendix?B. MySQL 5.6 Frequently Asked Questions

Process finished with exit code 0

注:从表中摘取数据,

2.改进
代码:

#coding=utf-8
from HTMLParser import HTMLParser
from htmlentitydefs import entitydefs
import sys

class TitleParser(HTMLParser):
    def __init__(self):
        self.title=''
        self.readingtitle=0
        HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        if tag =='title':
            self.readingtitle = 1

    def handle_data(self, data):
        if self.readingtitle:
            self.title += data

    def handle_endtag(self, tag):
        if tag == 'title':
            self.readingtitle = 0
    def handle_entityref(self, name):
        if entitydefs.has_key(name):
            self.handle_data(entitydefs[name])
        else:
            self.handle_data('&'+name+';')

    def gettitle(self):
        return self.title

fd = open(sys.argv[1])
tp = TitleParser()
tp.feed(fd.read())
print "Title is:",tp.gettitle()

etitle.html




    Document Title &Intro


this is my text.

运行结果一:

D:\python\python.exe E:/code/python/unit7/basic_title.py
E:/code/python/unit7/etitle.html
Title is: Document Title Intro

Process finished with exit code 0
运行结果二:

D:\python\python.exe E:/code/python/unit7/etitle.py
E:/code/python/unit7/etitle.html
Title is: Document Title &Intro

Process finished with exit code 0

当一个实体出现时,代码检查该实体是否可以识别,可以,转换为相应得知,否则输入流中的文字;

3.转换字符参考
代码:

#coding=utf-8
from HTMLParser import HTMLParser
from htmlentitydefs import entitydefs
import sys

class TitleParser(HTMLParser):
    def __init__(self):
        self.title=''
        self.readingtitle=0
        HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        if tag =='title':
            self.readingtitle = 1

    def handle_data(self, data):
        if self.readingtitle:
            self.title += data

    def handle_endtag(self, tag):
        if tag == 'title':
            self.readingtitle = 0
    def handle_entityref(self, name):
        if entitydefs.has_key(name):
            self.handle_data(entitydefs[name])
        else:
            self.handle_data('&'+name+';')
    def handle_charref(self, name):
        try:
            charnum=int(name)
        except ValueError:
            return
        if charnum<1 or charnum>225:
            return
        self.handle_data(chr(charnum))

    def gettitle(self):
        return self.title

fd = open(sys.argv[1])
tp = TitleParser()
tp.feed(fd.read())
print "Title is:",tp.gettitle()

4.处理不均衡的标签
代码:

#coding=utf-8
from HTMLParser import HTMLParser
from htmlentitydefs import entitydefs
import sys,re

class TitleParser(HTMLParser):
    def __init__(self):
        self.taglevels=[]
        self.handledtags=['title','ul','li']
        self.processing=None
        HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        if len(self.taglevels) and self.taglevels[-1] == tag:
            self.handle_endtag(tag)

        self.taglevels.append(tag)
        if tag in self.handledtags:
            self.data = ''
            self.processing = tag
            if tag == 'ul':
                print"List start"

    def handle_data(self, data):
        if self.processing:
            self.data += data

    def handle_endtag(self, tag):
        if not tag in self.taglevels:
            return

        while len(self.taglevels):
            starttag = self.taglevels.pop()

            if starttag in self.handledtags:
                self.finishprocessing(starttag)

            if starttag == tag:
                break

    def cleanse(self):
        self.data = re.sub('\s+', ' ', self.data)

    def finishprocessing(self, tag):
        self.cleanse()
        if tag == 'title' and tag == self.processing:
            print "Dom title", self.data
        elif tag == 'ul':
            print "List ended"
        elif tag == 'li' and tag == self.processing:
            print "List item", self.data
        self.processing = None

    def gettitle(self):
        return self.title

处理特殊值,如果在映射表中有对应的,即采用映射的值,否则为字面值

 def handle_entityref(self, name):
        if entitydefs.has_key(name):
            self.handle_data(entitydefs[name])
        else:
            self.handle_data('&' + name + ';')

    def handle_charref(self, name):
        try:
            charnum = int(name)
        except ValueError:
            return

        if charnum < 1 or charnum > 255:
            return

        self.handle_data(chr(charnum))

fd = open(sys.argv[1])
tp = TitleParser()
tp.feed(fd.read())

运行结果:

D:\python\python.exe E:/code/python/unit7/4un.py
E:/code/python/unit7/4un.html
Dom title DOCTYPE Title & Intro?
List start
List item First List item
List item second list item
List item second list item
List ended

Process finished with exit code 0

5.一个可以实际工作的例子

三、XML和XML-RPC

P169-p190
展示XML文档:tree,event.基于事件的解析器可以扫描文档,事件解析器可以响应。
8.2 使用Dom
代码:

#coding=utf-8
from xml.dom import minidom,Node

def scanNode(node,level=0):
    msg = node.__class__.__name__
    if node.nodeType == Node.ELEMENT_NODE:
        msg += ",tag" + node.tagName
    print " " * level * 4, msg
    if node.hasChildNodes:
        for child in node.childNodes:
            scanNode(child, level + 1)


doc = minidom.parse("Sample.xml")
scanNode(doc)

运行结果:

D:\python\python.exe E:/code/python/unit8/un1.py

 Document
>      Element,tagbook
>          Text
>          Element,tagtitle
>              Text
>          Text
>          Element,tagauthor
>              Text
>              Element,tagname
>                  Text
>                  Element,tagfirst
>                      Text
>                  Text
>                  Element,taglast
>                      Text
>                  Text
>              Text
>              Element,tagaffiliation
>                  Text
>              Text
>          Text
>          Element,tagchapter
>              Text
>              Element,tagtitle
>                  Text
>              Text
>              Element,tagpara
>                  Text
>                  Element,tagcompany
>                      Text
>                  Text
>              Text
>          Text

Process finished with exit code 0

sample.xml



     Sample XML Thing 
    
        
            Benjamin
            Smith
        
        Springy Widgets,Inc.
    

    
        First chapter
        
            I think widgets are great.you should buy lots
            of them from Springy widgets,Inc
        
    

2.使用dom完全解析
代码:

#coding=utf-8
"""
将XML以文本形式重新格式化输出
1.使用Node的节点类型,判断下一步如何处理
2.对不同的节点名(tagName)进行相应的处理
"""
from xml.dom import minidom, Node
import re, textwrap


class SampleScanner:
    def __init__(self, doc):
        for child in doc.childNodes:
            if child.nodeType == Node.ELEMENT_NODE and child.tagName == "book":
                """只处理book元素"""
                self.handleBook(child)

    def gettext(self, nodelist):
        """获取当前节点的文本,
        1.如果当前的节点为TEXT_NODE,将文本追加到列表中
        2.如果当前的节点不是TEXT_NODE,递归地调用gettext"""
        retlist = []
        for node in nodelist:
            if node.nodeType == Node.TEXT_NODE:
                retlist.append(node.wholeText)
            elif node.hasChildNodes:
                retlist.append(self.gettext(node.childNodes))

        return re.sub("\s+", " ", "".join(retlist))

    def handleBook(self, node):
        """处理Book节点
        1.如果不是ELEMENT_NODE,不予理睬
        2.如果是title,直接打印出文本内容
        3.如果是author,调用handleAuthor,继续处理节点
        4.如果是chapter,调用handleChapter,继续处理节点
        """
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "title":
                print "Book title is :", self.gettext(child.childNodes)
            if child.tagName == "author":
                self.handleAuthor(child)
            if child.tagName == "chapter":
                self.handleChapter(child)

    def handleAuthor(self, node):
        """处理Autho节点
        1.如果不是ELEMENT_NODE,不予理睬
        2.如果是name,调用handleAuthoerName,继续处理节点
        3.如果是affiliation,调用gettext,并打印出来
        """
        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "name":
                self.handleAuthorName(child)
            elif child.tagName == "affiliation":
                print "Author affiliation:", self.gettext([child])

    def handleAuthorName(self, node):
        """处理author.name节点
        1.使用getElementsByTagName获得子节点
        2.调用gettext得到子节点的文本,并打印处理
        """
        surname = self.gettext(node.getElementsByTagName("last"))
        givenname = self.gettext(node.getElementsByTagName("first"))

        print "Author Name:%s %s " % (surname, givenname)

    def handleChapter(self, node):
        """处理chapter节点
        1.如果不是ELEMENT_NODE,不予理睬
        2.如果是para,调用handlePara,继续处理
        """
        print "*** Start of Chapter %s,%s" % (
        node.getAttribute("number"), self.gettext(node.getElementsByTagName("title")))

        for child in node.childNodes:
            if child.nodeType != Node.ELEMENT_NODE:
                continue
            if child.tagName == "para":
                self.handlePara(child)

    def handlePara(self, node):
        """
        1.获取当前节点的文本
        2.调用textwrap格式化文本
        """
        paratext = self.gettext([node])
        paratext = textwrap.fill(paratext)
        print paratext


doc = minidom.parse("Sample.xml")
SampleScanner(doc)

运行结果:

D:\python\python.exe E:/code/python/unit8/un2.py
Book title is : Sample XML Thing
Author Name:Smith Benjamin
Author affiliation: Springy Widgets,Inc.
*** Start of Chapter 1,First chapter
I think widgets are great.you should buy lots of them from Springy
widgets,Inc

Process finished with exit code 0

3.使用Dom产生文档
代码:

#coding=utf-8
"""
使用minidom生成XML
1.创建Element,createElement
2.添加子节点,appendChild
3.创建Text,createTextNode
4.创建属性,createAttribute
"""
from xml.dom import minidom,Node

# 创建Document
doc = minidom.Document()
# 创建book节点
book = doc.createElement("book")
doc.appendChild(book)
# 创建Title节点
title = doc.createElement("title")
text = doc.createTextNode("Sample XML Thing")
title.appendChild(text)
book.appendChild(title)
# 创建author节点
author = doc.createElement("author")
# 创建name节点
name = doc.createElement("name")
first = doc.createElement("first")
first.appendChild(doc.createTextNode("Benjamin"))
name.appendChild(first)

last = doc.createElement("last")
last.appendChild(doc.createTextNode("Smith"))
name.appendChild(last)

author.appendChild(name)
book.appendChild(author)
# author节点完毕

# 创建chapter节点
chapter = doc.createElement("chapter")
chapter.setAttribute("number","1")
title = doc.createElement("title")
title.appendChild(doc.createTextNode("Fisrt Chapter"))
chapter.appendChild(title)

para = doc.createElement("para")
para.appendChild(doc.createTextNode("I think widgets are great.you should buy lots \
of them from"))
company = doc.createElement("company")
company.appendChild(doc.createTextNode("Springy widgets,Inc"))
para.appendChild(company)

chapter.appendChild(para)
# chapter节点完毕
book.appendChild(chapter)
# book节点完毕

print doc.toprettyxml(indent = " ")

运行结果:

D:\python\python.exe E:/code/python/unit8/un3.py



 Sample XML Thing
 
  
   Benjamin
   Smith
  
 
 
  Fisrt Chapter
  
   I think widgets are great.you should buy lots of them from
   Springy widgets,Inc
  
 

Process finished with exit code 0

4.dom类型参考

8.3使用xml-rpc

  1. 代码:
#coding=utf-8
import xmlrpclib
url='http://liandesinian.blog.51cto.com/7737219/1565474'
s=xmlrpclib.ServerProxy(url)
catdata=s.meerkat.getCategories()
cattiles=[item['title'] for item in catdata]
cattiles.sort()
for item in cattiles:
    print item

运行结果:

D:\python\python.exe E:/code/python/unit8/un6.py

Process finished with exit code 0

  1. 代码:
#coding=utf-8
import xmlrpclib,sys,textwrap

class NewsCat:
    def __init__(self,catdata):
        self.id=catdata['id']
        self.title=catdata['title']
    def __cmp__(self, other):
        return cmp(self.title,other.title)

class NewsSource:
    def __init__(self,url='http://www.oreillynet.com/meerkat/xml-rpc/server.php'):
        self.s=xmlrpclib.ServerProxy(url)
        self.loadcats()

    def loadcats(self):
        print "Loading categories...."
        catdata=self.s.meerkat.getCatgries()
        self.cats=[NewsCat(item) for item in catdata]
        self.cat.sort()

    def displaycats(self):
        numonline=0
        i=0
        for item in self.cats:
            sys.stdout.write("%2d:%20.20s"%(i+1,item.title))
            i+=1
            numonline+=1
            if numonline%3==0:
                sys.stdout.write("\n")
        if numonline!=0:
            sys.stdout.write("\n")


def promotcat(self):
    sys.__displaycats()
    sys.stdout.write("select a catgory or q to quit")
    selection = sys.stdin.readline().strip()
    if selection == 'q':
        sys.exit(0)
    return int(selection) - 1


def dispact(self, cat):
    items = self.s.meerkat.getItems({'category': cat,
                                     'ids': 1,
                                     'descriptions': 1,
                                     'categories': 1,
                                     'channels': 1,
                                     'data': 1,
                                     'num_items': 15})
    if not len(items):
        print "Sorry,no items in that category."
        sys.stdout.write("Press Enter to continue:")
        sys.stdin.readline()
        return
    while 1:
        print self.dispitemsummary(items)
        sys.stdout.write("select a catgory or q to quit")
        selection = sys.stdin.readline().strip()
        if selection=='q':
             return

        self.dispitem(items[int(selection)-1])


def dispitemsummary(self, items):
    counter = 0
    for item in items:
        print "%2d:%s"(counter + 1, item['title'])
        counter += 1


def dispitem(self, item):
    print "---%s---" % item['title']
    print "Posted on", item['data']
    print "Description:"
    print textwrap.fill(item['description'])
    print "\nlink:", item['link']
    sys.stdout.write("\nPress Enter to continue: ")
    sys.stdin.readline()
    n = NewsSource()
    while 1:
        cat = n.promotcat()
        n.dispact(cat)
posted @ 2017-03-18 12:08 BugsTerminator 阅读( ...) 评论( ...) 编辑 收藏
刷新评论 刷新页面 返回顶部

转载于:https://www.cnblogs.com/chance88/p/6572879.html

你可能感兴趣的:(python网络编程学习笔记(一))