2023年美赛Y题帆船数据补充,py源码(部分)。

from time import sleep

import requests
import re
from bs4 import BeautifulSoup


def get_response(html_url):
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54'
    }
    page_text = requests.get(url=html_url,headers=headers).text
    return page_text

def get_url(html_url):
    list = []
    page_text = get_response(html_url)
    soup = BeautifulSoup(page_text, 'html.parser')
    tbody = soup.find('tbody')
    tr = tbody.find_all('tr')
    for link in tr:
        list.append(link.find('a').get('href'))
    return list

def get_url1(html_url):
    list_name = []
    page_text = get_response(html_url)
    soup = BeautifulSoup(page_text, 'html.parser')
    tbody = soup.find('tbody')
    tr = tbody.find_all('tr')
    for link in tr:
        list_name.append(link.find('a').get_text().strip())
    return list_name

def get_data(url):
    page_text = get_response(url)
    soup = BeautifulSoup(page_text, 'html.parser')
    # data_label = soup.find_all('div', class_='col-sm-3  col-xs-6 sailboatdata-label')
    # 数据爬取
    data1 = soup.find_all('div', class_='sailboatdata-data col-xs-6 col-sm-3')
    LOA =data1[2].get_text().strip()
    LWL = data1[3].get_text().strip()
    Beam = data1[4].get_text().strip()
    SA = data1[5].get_text().strip()
    Draft_max = data1[6].get_text().strip()
    Draft_min = data1[7].get_text().strip()
    print("LOA:"+LOA+"\n"+"LWL:"+LWL+"\n"+"Beam:"+Beam+"\n"+"SA:"+SA+"\n"+"Draft_max:"+Draft_max+"\n"+"Draft_min:"+Draft_min)
    # for div_data in data1:
    #     print(div_data.find_all('div')[1].get_text())

if __name__ == '__main__':
    url = 'https://sailboatdata.com/'
    list = get_url(url)
    list_name = get_url1(url)
    print(list)
    print(list_name)
    i = 0
    for data_url in list:
        print(list_name[i])
        # url = 'https://sailboatdata.com/sailboat/12-square-meter-sharpie'
        get_data(data_url)
        i = i+1

你可能感兴趣的:(python,beautifulsoup)