python爬取电影天堂网各个电影下载地址

# -*- coding:utf-8 -*-

import requests
from lxml import etree
url="https://www.dy2018.com/html/gndy/dyzz/index.html"
response=requests.get(url)
#统一网页的编码格式
#记得加上这句话  response.encoding=response.apparent_encoding
root=etree.HTML(response.text)
page_urls=root.xpath("//select[@name='select']/option/@value")
for page_url in page_urls:
    #每一页的地址
    page_url="https://www.dy2018.com"+page_url
    response=requests.get(page_url)
    root1=etree.HTML(response.content)
    a_list=root1.xpath("//td[@height='26']/b/a")
    for a in a_list:
        # []print(a)
        title=a.xpath("text()")
        if title:
            title=title[0]
        else:
            continue
        # print(title)
        url=a.xpath("@href")
        if url:
            url=url[0]
        else:
            continue
        if not url.startswith("http"):
            url="https://www.dy2018.com"+url
        movie_response=requests.get(url)
        movie_root=etree.HTML(movie_response.content)
        #helloword
        #root.xpath("//a/text()")  找到的只是hello
        #root.xpath("//a//text()")  找到的是hello,word
        #/找到的是单标签里的文本  //找到的是标签下的所有文本
        movie_links=movie_root.xpath("//td[@bgcolor='#fdfddf']//text()")
        print(movie_links)
        print(page_url)


你可能感兴趣的:(python)