import pymysql
import requests
from bs4 import BeautifulSoup
def get_page(url):
responce = requests.get(url)
soup = BeautifulSoup(responce.text,’lxml’)
return soup
def get_links(url_link):
responce = requests.get(url_link)
soup = BeautifulSoup(responce.text,’lxml’)
links_div = soup.findAll(‘div’,class_= “pic-panel”)
links = [div.a.get(‘href’) for div in links_div]
return links
def get_house_info(house_url):
soup = get_page(house_url)
price = soup.find(‘span’,class_ = ‘total’).text
unit = soup.find(‘span’,class_ = ‘unit’).text.strip()
house_info =soup.find_all(‘p’)
area = house_info[0].text[3:]
layout = house_info[1].text[5:]
floor = house_info[2].text[3:]
direction = house_info[3].text[5:]
# 使用字典把获取到的数据显示出来
info = {
‘价格’ : price,
‘单位’ : unit,
‘面积’ : area,
‘户型’ : layout,
‘楼层’ : floor,
‘朝向’ : direction
}
return info
DATABASE = {
‘host’:’192.168.43.47’,#如果是远程数据库,此处为远程服务器的IP地址
‘database’:’Lianjia’,
‘user’:’root’,
‘password’:’Lk123123’,
};
def get_db(setting):
return pymysql.Connect(**setting)
def insert(db,house_info):
values = "'{}',"*5 + "'{}'"
sql_values = values.format(house['价格'],house['单位'],house['面积'],house['户型'],house['楼层'],house['朝向'])
sql = """
insert into house(price,unit,area,layout,floor,direction)
values({})
""".format(sql_values)
print (sql)
cursor = db.cursor()
cursor.execute(sql)
db.commit()
house = get_house_info(‘https://bj.lianjia.com/zufang/101103057796.html‘)
db = get_db(DATABASE)
insert (db, house)
错误如下:
ProgrammingError: (1146, “Table ‘lianjia.house’ doesn’t exist”)