Python 爬虫

Python 爬虫

urllib

// 网络请求
from urllib.request import urlopen
// 下载
from urllib.request import urlretrieve
// HTTP 错误
from urllib.error import HTTPError, URLError
// 网页解析
from urllib.parse import urlparse

BeautifulSoup

from bs4 import BeautifulSoup as bs

findAll(tag, attributes, recursive, text, limit, keywords)
find(tag, attributes, recursive, text, keywords)

myImgTag.attrs["src"]

re

import re

re.compile()

datetime

import datetime

random

import random
random.seed(datetime.datetime.now())
random.randint(0, len(links)-1)

json

import json
from urllib.request import urlopen
def getCountry(ipAddress):
response = urlopen("http://freegeoip.net/json/"+ipAddress).read()
                           .decode('utf-8')
         responseJson = json.loads(response)
return responseJson.get("country_code")


import json
     jsonString = '{"arrayOfNums":[{"number":0},{"number":1},{"number":2}],
                    "arrayOfFruits":[{"fruit":"apple"},{"fruit":"banana"},
                                    {"fruit":"pear"}]}'
     jsonObj = json.loads(jsonString)
print(jsonObj.get("arrayOfNums")) print(jsonObj.get("arrayOfNums")[1]) print(jsonObj.get("arrayOfNums")[1].get("number")+
jsonObj.get("arrayOfNums")[2].get("number")) print(jsonObj.get("arrayOfFruits")[2].get("fruit"))
 出的结果是:
     [{'number': 0}, {'number': 1}, {'number': 2}]
     {'number': 1}
     3
     pear

你可能感兴趣的:(Python 爬虫)