爬虫三——异常

'''
异常处理
    程序在运行过程中, 出现了一些未知错误

    捕获异常
    try:
        可能出现异常的代码
    except:
        对异常进行处理
'''

a = 10
b = 0
try:
    c = a / b
    print('a / b = ' + str(c))
except:
    print('除数不能是0')

print('结束')

抓取京东:加了异常处理

from selenuim import webdriver
from selenuim.webdrive.support.wait import WebDriverWait
from selenuim.webdriver.support import expected_conditions as EC
from selenuim.webdriver.common.by import By
import time
import traceback

# 获取所有商品列表
def get_goods_list(browser):

    ls = []
    count = 0
    wait = WebDriverWait(browser, 10)

    while True:
        try:
            goods_list = wait.until(EC.presence_of_element_located((By.ID, 'J_goodsList')))
            ls_li = goods_list.find_elements_by_class_name('gl-item')

            # 在浏览器执行一段JS代码, 让页面滚动到指定位置
            browser.execute_script('arguments[0].scrollIntoView();', ls_li[len(ls_li) - 1])
            time.sleep(1)
            goods_list = wait.until(EC.presence_of_element_located((By.ID, 'J_goodsList')))

            ls_li = goods_list.find_elements_by_class_name('gl-item')

            # ls = []
            for li in ls_li:
                # 字典
                goods = {}
                price = li.find_element_by_css_selector('.p-price i').text
                name = li.find_element_by_css_selector('.p-name em').text
                goods['price'] = price
                goods['name'] = name

                ls.append(goods)

            #     打开下一页
            next = browser.find_element_by_class_name('pn-next')
            js_next = next.get_attribute('onclick')

            # 到最后一页停止
            # if js_next == None:
            #     break
            # 或者
            if not js_next:
                break

            # 执行js代码
            browser.execute_script(js_next)

        except Exception as e:
            print(repr(e))
            print(traceback.format_exc())

            time.sleep(1)
            count += 1

            if count >= 10:
                return []

    return ls


if __name__ == '__main__':
    browser = webdriver.Chrome()

    # 打开页面
    browser.get('https://www.jd.com')

    # 用户输入关键字,查询此关键字的信息
    key = input('请输入一个关键字:')

    # 获取input
    input_markup = browser.find_element_by_id('key')
    input_markup.send_keys(key)
    # 模拟回车,打开新页面
    input_markup.send_keys('\n')

    ls = get_goods_list(browser)

    # 总共多少条数据
    print(len(ls))
#     输出数据
    print(ls)


#   保存到数据库中
    ...


你可能感兴趣的:(专业技能—Python)