python requests 爬取链家二手房 存入mysql

import requests
import re
import csv
import time
import warnings
import pymysql

class LianjiaSpider:
    
    def __init__(self):
        self.baseurl = 'https://bj.lianjia.com/ershoufang/pg/'
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER'}
       # 创建连接对象
        self.db = pymysql.connect('localhost','root','123456',charset='utf8')
        # 创建游标
        self.cursor = self.db.cursor()
    
    
    
    def getPage(self,url):
        res = requests.get(url,headers=self.headers)
        res.encoding='urf-8'
        self.parsePage(res.text)
        time.sleep(0.2)
    
    
    def parsePage(self,html):
        patterns = 'div class="title">.*?(.*?).*?
.*?(.*?)/(.*?)/(.*?)/(.*?).*?
(.*?)/(.*?)/(.*?).*?
(.*?)' r = re.compile(patterns,re.S) r_list = r.findall(html) self.writeToText(r_list) def writeToText(self,r_list): c_db = 'create database if not exists lianjia charset utf8' u_db = 'use lianjia' c_table = """create table if not exists ershoufang(id int primary key auto_increment, title varchar(100), address varchar(100), model varchar(100), size varchar(100), direction varchar(100), floor varchar(30), start_year varchar(20), position varchar(30), price varchar(10)); """ ins = 'insert into ershoufang(title,address,model,size,direction,floor,start_year,position,price)values(%s,%s,%s,%s,%s,%s,%s,%s,%s)' warnings.filterwarnings('ignore') try: self.cursor.execute(c_db) self.cursor.execute(u_db) self.cursor.execute(c_table) except: pass for r_tuple in r_list: L=[] for r_str in r_tuple: L.append(r_str.strip()) print(L) # execute(ins,[列表]) self.cursor.execute(ins, L) self.db.commit() print('存入数据库成功') def workOn(self): # self.getPage(self.baseurl) num = int(input('请输入要爬取数据页数:\n')) for n in range(1,num+1): url = self.baseurl+str(n) self.getPage(url) print('第'+str(n)+'页,爬取成功') self.cursor.close() self.db.close() if __name__ =='__main__': spider = LianjiaSpider() spider.workOn()

运行结果:

python requests 爬取链家二手房 存入mysql_第1张图片 

 

你可能感兴趣的:(爬虫学习,mysql)