Tkinter爬虫(Zealer、Mydrivers)--with Proxy

import urllib2



class UseProxy(object):

    def __init__(self):

        self.user = 'aaaa'

        self.password = 'bbbb'

        self.proxyserver = 'xxx.yyy.zzz:8080'

        self.content = ''



    def getproxy(self):

        proxy = 'http://%s:%s@%s' % (self.user, self.password, self.proxyserver)

        proxy_handler = urllib2.ProxyHandler({'http': proxy})

        opener = urllib2.build_opener(proxy_handler, urllib2.HTTPHandler)

        # self.content = opener.open(self.url).read().decode('utf-8')

        return opener
UseProxy

from urlparse import urljoin

import re

from UseProxy import *

from bs4 import BeautifulSoup



class GetZealerVideo(object):

    def __init__(self):

        self.url = 'http://www.zealer.com'

        self.content = ''

        self.lists = []



    def splitcontent(self, proxyset):

        # self.proxyset = UseProxy()

        self.content = proxyset.getproxy().open(self.url).read().decode('utf-8')

        # self.useproxy()

        soup = BeautifulSoup(self.content, "html.parser")

        founddiv = soup.findAll('div', {'class': 'subject'})

        foundli = soup.findAll('div', {'id': re.compile("^li_layer")})

        l = len(founddiv)

        self.lists = []

        if l == len(foundli):

                for i in range(l):

                    b = re.search('/post(/\d+)*', str(foundli[i]))

                    self.lists.append(urljoin(self.url, b.group()))

                    self.lists.append(founddiv[i].contents[0].encode('utf-8'))

        return self.lists

                    

if __name__ == '__main__':

    gvideo = GetZealerVideo()

    proxyset = UseProxy()

    print '.'.join(gvideo.splitcontent(proxyset)).decode('utf-8')
GetZealerVideo

from UseProxy import *

from bs4 import BeautifulSoup



class GetMydrivers(object):

    def __init__(self):

        self.url = 'http://www.mydrivers.com'

        self.content = ''

        self.lists = []



    def splitcontent(self, proxyset):

        # self.useproxy()

        self.content = proxyset.getproxy().open(self.url).read()

        soup = BeautifulSoup(self.content, "html.parser", from_encoding="gb18030")

        print soup.original_encoding

        founddiv = soup.findAll('span', {'class': 'titl'})



        for i in range(len(founddiv)):

            self.lists.append(founddiv[i].contents[0])

        return self.lists



if __name__ == '__main__':

    gnews = GetMydrivers()

    proxyset = UseProxy()

    lists = gnews.splitcontent(proxyset)

    for l in lists:

            print str(l).decode('utf-8').encode('gb18030')
GetMydrivers


# -*- coding: utf-8 -*-

from Tkinter import *

from time import ctime

import os

import re

import GetZealerVideo as soup

import GetMydrivers as mnews

from UseProxy import *



class GetResource(object):

    def __init__(self):

        self.win = Tk()



        self.l1 = StringVar(self.win)

        self.msg = ""

        self.frame = Frame(width=800, height=600, bg='white')

        # self.frame.grid_propagate(False)

        # self.frame.grid()

        self.frame.propagate(False)

        self.frame.pack()



        self.scroll = Scrollbar(self.frame)

        self.scroll.pack(side=RIGHT, fill=Y)

        # self.scroll.grid(row=0, column=1)

        self.listbox = Listbox(self.frame, selectbackground='blue', font='12', heigh=550, width=750, yscrollcommand=self.scroll.set,

                               xscrollcommand=self.scroll.set)

        self.listbox.pack(side=TOP, fill=BOTH)

        # self.listbox.grid(row=0, column=0)

        self.listbox.bind('<Double-1>', self.get_select)



        self.frame2 = Frame(width=800, height=50, bg='white')

        self.frame2.propagate(False)

        self.frame2.pack()

        # self.frame2.grid_propagate(False)

        # self.frame2.grid()

        Button(self.frame2, text=u'Get Zealer', command=self.zealer_video).pack(expand=YES)

        # Button(self.frame2, text=u'Get Zealer', command=self.zealer_video).grid(row=0, column=0)



        Button(self.frame2, text=u'Get Mydrivers', command=self.my_drivers).pack(expand=YES)

        # Button(self.win, text=u'Get Mydrivers', command=self.my_drivers).grid(row=1, column=1)



    def my_drivers(self):

        print 'start get at:', ctime()

        self.listbox.delete(0, END)

        self.getm = mnews.GetMydrivers()

        proxyset = UseProxy()

        for l in self.getm.splitcontent(proxyset):

            s = str(l).decode('utf-8')

            try:

                self.listbox.insert(END, re.findall(r'(?<=href=").+?(?=">)', s)[0]+"\r\n")

                self.listbox.insert(END, re.findall(r'(?<=>).+?(?=<)', s)[0]+"\r\n")

                self.listbox.update()

            except IndexError:

                pass

        print 'get done at:', ctime()



    def zealer_video(self):

        print 'start get at:', ctime()

        self.listbox.delete(0, END)

        self.getz = soup.GetZealerVideo()

        proxyset = UseProxy()

        for l in self.getz.splitcontent(proxyset):

            self.listbox.insert(END, l+"\r\n")

            self.listbox.update()

        print 'get done at:', ctime()



    def get_select(self, ev=None):

        self.listbox.config(selectbackground='red')

        print self.listbox.curselection()

        self.check = self.listbox.get(self.listbox.curselection())

        if self.check:

            if re.match('http', self.check):

                os.startfile(self.check)



def main():

    d = GetResource()

    mainloop()



if __name__ == '__main__':

    main()

 

你可能感兴趣的:(driver)