淘宝抓取宝贝买家秀核心代码

原文链接: https://www.cnblogs.com/secondtonone1/p/10221927.html
#淘宝抓取宝贝买家秀核心代码
def getPhoto(self,*comentlist):
        try:
            for comments in comentlist:
                #print(len(comentlist))
                #print(type(comments))
                desc=comments.find_element_by_class_name('tm-rate-fulltxt').text
                if len(desc) == 0:
                    desc='abcdef'
                dirfix=desc[0:6]
                dirname=os.path.join(self.path,dirfix)
                if os.path.exists(dirname) == False:
                    os.makedirs(dirname)
                txtname=os.path.join(dirname,desc[0:6]+'.txt')
                if os.path.exists(txtname) == False:
                    with open (txtname,'w',encoding='utf-8') as file:
                        file.write(desc)
                photos=comments.find_element_by_class_name('tm-m-photos')
                photos=photos.find_element_by_class_name('tm-m-photos-thumb')
                photos=photos.find_elements_by_tag_name('li')
                for ph in photos:
                    phaddr=ph.get_attribute('data-src')
                    print(phaddr)
                    bigph=phaddr.split('_4')[0]
                    print(bigph)
                    imgname= os.path.join(dirname ,bigph.split('/')[-1])
                    if os.path.exists(imgname) :
                        continue
                    img=self.session_.get('http:'+bigph,headers=self.headers_,cookies=self.cookiejar_).content
                    print('正在爬取%s' %(bigph))
                    with open (imgname,'wb') as imgfile:
                        imgfile.write(img)
                    print('爬取成功%s' %(bigph))
                    time.sleep(2)
        except NoSuchElementException:
            print('No Element')
        except TimeoutException :
            print('TimeoutException')
        except:
            print('getPhoto exception')    
            pass

还未验证。先收藏保留。

你可能感兴趣的:(python学习)