用python爬大一波美女妹子

python版本:2.7
和大家一起交流编程心得,如果代码有需要改进的地方,希望大家多提意见

一大波的妹子福利哦

#coding:utf-8
import urllib2,re,os
from urllib import *
from time import sleep
 
class spider:
  def __init__(self):
    self.lst_girl=[]
    self.lst_fail=[]
    self.lst_use=[]
    self.PATH=os.getcwd()
    self.host='http://www.zngirls.com'
  def saveimg(self,fdir,img_url):
    fn=img_url.split('/')
    try:
      data=urllib2.urlopen(img_url,timeout=20).read()
      f=open(fdir+'\\'+fn[-1],'wb')
      f.write(data)
      f.close()
      print 'save image ===========  ok'
    except:
      print 'save image error ==== OK'
      f=open(fdir+'\\err.txt','w')
      f.write(img_url)
      f.close()
     
  def mkdir(self,fdir):
    ie=os.path.exists(fdir)
    if not ie:
      os.makedirs(fdir)
 
  #获取所有列表
  def getgirllist(self):
    url='http://www.zngirls.com/ajax/girl_query_total.ashx'
    c='%E9%9F%A9%E5%9B%BD'  #country棒子
    #p='%E8%BD%A6%E6%A8%A1'  #模特
    tmp=unquote(c)
    #temp=unquote(p)     #url double encode
    country=unquote(tmp)
    #profe=unquote(temp)
    hd={'Host':'www.zngirls.com',
        'User-Agent':'Mozilla/5.0 (Windows NT 5.1; rv:17.0) Gecko/20100101 Firefox/17.0',
        'Referer':'http://www.zngirls.com/find/',
        'X-Requested-With':'XMLHttpRequest'
        }
    i=1
    go=True
    lst_count=[]
    while go:
      postdata={'country':country,
              'curpage':str(i),
              'pagesize':'20'
              }
      post_data=urlencode(postdata)
      req=urllib2.Request(url,post_data,hd)
      html=urllib2.urlopen(req).read()
      pat=re.compile('/girl/[\d]+')
      lst_url=re.findall(pat,html)
      lst_count+=lst_url
      print '初始化完成页数: ' +str(i)
      if len(lst_url)>1:
        go=True
        i+=1
      else:
        go=False
       
    glst=list(set(lst_count))
    fp=open('list.txt','w')
    for s in glst:
      fp.write(s+'\n')
    fp.close()
    print '初始化完成 ================ OK'
    print '获取数据长度: '+ str(len(glst))
    return glst
   
    #处理数据
  def solvedata(self,html):
    pat=re.compile("value='(.*?)'")
    found=re.findall(pat,html)
     
    ipat=re.compile('(.*?)',re.S)
    tmp=ipat.search(html).group(1)
    info=re.sub('<[^>]+>','',tmp)
    info=info.replace(' ','')
    fdir=os.getcwd()+'\\spider\\'+found[0]
    print fdir
    self.mkdir(fdir)
    fp=open(fdir+'\\list.txt','w')
    for opt in found:
      fp.write(opt+'\n')
    fp.write(info)
    fp.close()
    print 'write file ======  ok'
    #===image ================
    im=re.compile("class='imglink' href='(.*?)'>



你可能感兴趣的:(用python爬大一波美女妹子)