python获取csdn课程基本信息

目标数据:

python获取csdn课程基本信息_第1张图片

代码:

import requests
from lxml import etree

# 爬取地址:https://www.csdn.net/gather_4a/NtDakg1sOC1lZHUO0O0O.html?page=1
# 爬取数据:课程标题、课程价格、课程讲师


url = ["https://www.csdn.net/gather_4a/NtDakg1sOC1lZHUO0O0O.html?page={}".format(
    str(j)) for j in range(1, 4)]

print(len(url))
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}

return_Date = []
for urls in url:
    web_Source_Code = requests.get(urls, headers=headers)

    print(web_Source_Code.status_code)

    html = etree.HTML(web_Source_Code.text)

    block_1 = html.xpath('//div[@class="course_item "]')

    print('找到目标元素:', len(block_1))

    for block_2 in block_1:
        course_Name = block_2.xpath('div[@class="course_title"]/a/text()')
        price_Of_Course = block_2.xpath(
            'div[@class="course_lecturer"]/span/text()')
        the_Course_Instructor = block_2.xpath(
            'div[@class="course_lecturer"]/a/span/text()')
        return_Date.append({
            "course_Name": course_Name,
            "price_Of_Course": price_Of_Course,
            "the_Course_Instructor": the_Course_Instructor
        })


for date_s in return_Date:
    print(date_s)

 

输出截图:

python获取csdn课程基本信息_第2张图片

你可能感兴趣的:(爬虫,python,csdn)