Python操作数据库、Python爬虫笔记

PYTHON

import MySQLdb
DATABASE = {
    'host': '127.0.0.1',
    'database': 'test',
    'user': 'root',
    'password': 'zs@309337',
    'charset': 'utf8mb4'
}
db = MySQLdb.connect(**DATABASE)
db
<_mysql.connection open to '127.0.0.1' at 0000029E6281E478>

游标

cursor =db.cursor()
sql = "select*from class"
cursor.execute(sql)
results = cursor.fetchall()
for row in results:
    print(row)
(1, '高三一班')
(2, '高三二班')

查询

sql= "select * from class where id <=1 limit 3"
cursor.execute(sql)
results = cursor.fetchall()
for row in results:
    print(row)
(1, '高三一班')

插入

sql= "INSERT INTO class (name) VALUES ('Wilson')"
cursor = db.cursor()
cursor.execute(sql)
db.commit()

删除

delete

更新

updata

捕捉异常

try:
    a = 10
    b = 1 + 'hello'
except TypeError as e:
    print(e)
unsupported operand type(s) for +: 'int' and 'str'

爬虫

python 库

安装

pip install requests

pip install bs4

import requests
from bs4 import BeautifulSoup

url = "https://bj.lianjia.com/ershoufang/"
responce = requests.get(url)
soup = BeautifulSoup(responce.text)
links_div = soup.find_all('a',class_='noresultRecommend')
links = [a.get('href') for a in links_div]
links,len(links)

def get_links(url):
    responce = requests.get(url)
    soup = BeautifulSoup(responce.text)
    links_div = soup.find_all('a',class_='noresultRecommend')
    links = [a.get('href') for a in links_div]
    return links
url = "https://bj.lianjia.com/ershoufang/"
get_links(url)
['https://bj.lianjia.com/ershoufang/101107519000.html',
 'https://bj.lianjia.com/ershoufang/101101456992.html',
 'https://bj.lianjia.com/ershoufang/101102299892.html',
 'https://bj.lianjia.com/ershoufang/101102922897.html',
 'https://bj.lianjia.com/ershoufang/101102990382.html',
 'https://bj.lianjia.com/ershoufang/101103129496.html',
 'https://bj.lianjia.com/ershoufang/101103370958.html',
 'https://bj.lianjia.com/ershoufang/101103659221.html',
 'https://bj.lianjia.com/ershoufang/101103893958.html',
 'https://bj.lianjia.com/ershoufang/101104092018.html',
 'https://bj.lianjia.com/ershoufang/101104683726.html',
 'https://bj.lianjia.com/ershoufang/101104133236.html',
 'https://bj.lianjia.com/ershoufang/101102833009.html',
 'https://bj.lianjia.com/ershoufang/101102442399.html',
 'https://bj.lianjia.com/ershoufang/101103259666.html',
 'https://bj.lianjia.com/ershoufang/101103274797.html',
 'https://bj.lianjia.com/ershoufang/101103603972.html',
 'https://bj.lianjia.com/ershoufang/101104156284.html',
 'https://bj.lianjia.com/ershoufang/101104305244.html',
 'https://bj.lianjia.com/ershoufang/101105405296.html',
 'https://bj.lianjia.com/ershoufang/101103598352.html',
 'https://bj.lianjia.com/ershoufang/101104849813.html',
 'https://bj.lianjia.com/ershoufang/101105130005.html',
 'https://bj.lianjia.com/ershoufang/101105586990.html',
 'https://bj.lianjia.com/ershoufang/101103518101.html',
 'https://bj.lianjia.com/ershoufang/101105994952.html',
 'https://bj.lianjia.com/ershoufang/101103434042.html',
 'https://bj.lianjia.com/ershoufang/101103834871.html',
 'https://bj.lianjia.com/ershoufang/101105222136.html',
 'https://bj.lianjia.com/ershoufang/101104319814.html']

优化代码

import requests
from bs4 import BeautifulSoup
def get_page(url):
    responce = requests.get(url)
    soup = BeautifulSoup(responce.text)
    return soup
def get_links(link_url):
    soup = get_page(link_url)
    links_div = soup.find_all('a',class_='noresultRecommend')
    links = [a.get('href') for a in links_div]
    return links    
house_url = "https://bj.lianjia.com/ershoufang/101107519000.html"
soup = get_page(house_url)
price = soup.find('span',class_="total").text
unit = soup.find('span',class_="unitPriceValue")
unit.i.text
'元/平米'





你可能感兴趣的:(深度学习,python,mysql)