【爬虫基础】day01 学习get传参

“”"
get传参:
(1)汉字报错:解释器ascii没有汉字。需要url汉字转码
urllib.parse.quote(url, safe = string.printable)
(2) 字典传参:
urllib.parse.urlencode(dictionary)
“”"
“”"
post传参:
urllib.request.urlopen(url,data=“服务器接收的数据”)

“”"

import urllib.request
import urllib.parse
import string


def load_data():
    url = "http://www.baidu.com/"
    # url的get请求
    # response:https响应的对象
    response = urllib.request.urlopen(url)
    print(response)
    # 读取内容,将获取的内容转换成字符串
    data = response.read()
    print(data)
    str_data = data.decode("utf-8")
    print(str_data)
    # 将数据写成文件
    with open("baidu.html", "w", encoding="utf-8") as e:
        e.write(str_data)
    # 将字符串类型转换成bytes
    str_name = "baidu"
    byte_str = str_name.encode("utf-8")
    print(byte_str)

    # python爬取的类型:str bytes
    # 如果是str类型,则保存为二进制时,就需要encode("utf-8")
    # 如果是bytes类型,需要保存为str类型时,则需要解码:decode("utf-8")


# 以百度“美女”网页为例
def get_method_params():
    url = "http://www.baidu.com/s?wd="
    # 拼接字符串
    # 网址内出现了汉字,会报错
    name = "美女"
    final_url = url + name
    print(final_url)
    # 将包含汉字的网址进行转译
    encode_url = urllib.parse.quote(final_url, safe=string.printable)
    # 使用代码发送网络请求
    response = urllib.request.urlopen(encode_url)
    print(response)
    # UnicodeEncodeError: 'ascii' codec can't encode characters in position 10-11: ordinal not in range(128)
    # 报错原因:python是解释性语言,解析器只支撑ascii (0-127)
    # 即:不支持中文
    # 将此保存在特定目录
    # 读取返回的内容
    data = response.read()
    # 将返回的byte内容变成字符串形式
    str_data = data.decode("utf-8")
    with open("百度_美女.html", "w", encoding="utf-8") as f:
        f.write(str_data)


get_method_params()


load_data()

你可能感兴趣的:(【爬虫基础】day01 学习get传参)