from time import sleep
import requests
import re
from bs4 import BeautifulSoup
def get_response(html_url):
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54'
}
page_text = requests.get(url=html_url,headers=headers).text
return page_text
def get_url(html_url):
list = []
page_text = get_response(html_url)
soup = BeautifulSoup(page_text, 'html.parser')
tbody = soup.find('tbody')
tr = tbody.find_all('tr')
for link in tr:
list.append(link.find('a').get('href'))
return list
def get_url1(html_url):
list_name = []
page_text = get_response(html_url)
soup = BeautifulSoup(page_text, 'html.parser')
tbody = soup.find('tbody')
tr = tbody.find_all('tr')
for link in tr:
list_name.append(link.find('a').get_text().strip())
return list_name
def get_data(url):
page_text = get_response(url)
soup = BeautifulSoup(page_text, 'html.parser')
data1 = soup.find_all('div', class_='sailboatdata-data col-xs-6 col-sm-3')
LOA =data1[2].get_text().strip()
LWL = data1[3].get_text().strip()
Beam = data1[4].get_text().strip()
SA = data1[5].get_text().strip()
Draft_max = data1[6].get_text().strip()
Draft_min = data1[7].get_text().strip()
print("LOA:"+LOA+"\n"+"LWL:"+LWL+"\n"+"Beam:"+Beam+"\n"+"SA:"+SA+"\n"+"Draft_max:"+Draft_max+"\n"+"Draft_min:"+Draft_min)
if __name__ == '__main__':
url = 'https://sailboatdata.com/'
list = get_url(url)
list_name = get_url1(url)
print(list)
print(list_name)
i = 0
for data_url in list:
print(list_name[i])
get_data(data_url)
i = i+1