俗话说:钻石恒久远,一颗永流传
先说一下背景,楼主是某小型互联网公司的数据分析师,因为以前有编程的基础,所以略懂一点爬虫相关的技术。由于到了结婚的年龄,所以找到政府领取了女朋友一枚,并且把婚期订到了今年的圣诞节。婚期有了怎么能没有婚礼必备的东西----钻石呢?
先是在各种周大福、周六福、周福福看,后来又在某东、某宝、某官网看,翻遍了网上各种达人教你如何选择钻戒的文章、视频。
最终总结了一下,钻石主要看一下几点:
价格 克拉数 颜色 净度 切工 荧光
由于加工好的成品钻价格实在是太高了,所以就把主意打到了"裸钻"身上,去了大罗塘一类的珠宝批发市场,热了一身的汗,但是还是感觉没有合适的
无意间发现一个神仙网站就是这个bluenile
同样50分H颜色VS1切工的钻石,在这里买裸钻居然还不到1W块
啰啰嗦嗦,说了这么多,现在上代码
# 下面的代码是爬取并写入到CSV文件中
import requests
import time
import json
import pandas as pd
import re
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36",
"Cookie": "browserCheck=ver~1&browserCheck~true; holiday=ver~1&hasAdjustedHolidayShipping~false; GUID=3AC0A668_AF4E_4021_88C3_172963437EAA; bld=ver~9&cl~0.0; wishlist=ver~2&wishList~0; migrationstatus=ver~2&redirected~false; bncust=ver~2&SignInURL~https://secure.bluenile.com/cn/account/settings&CustName~chuo&CID~12694412-2-2834999531; pop=ver~4&belpop~false&china~true&emailpop~false&french~false&ie~false&internationalSelect~false&iphoneApp~false&s100off~false&survey~false&uae~false&webroompop~false; dsearch=ver~9&visible~800000000&newUser~true&shownHolidayShipping~&state~RD---------------------0------0,null,null-default-asc-CNY-------; bnsetsearch=ver~3&filters~ring_style|Solitaire|Ring+Style&id~BYOR+Setting+Search&sort~BS; devconfig=ver~4&debugmode~false&force_serve_awesome~false&force_serve_local_chat~false&force_serve_local_yotpo~false&force_serve_non_awesome~false&force_serve_non_local_chat~false&force_serve_non_local_yotpo~false&force_serve_non_pre_launch~false&force_serve_non_solr~false&force_serve_pre_launch~false&force_serve_solr~false; sitetrack=ver~3&jse~1; locale=ver~2&country~CHN¤cy~CNY&language~zh-Hans&productSet~BNCN; IR_gbd=bluenile.com; bnper=ver~7&NIB~1&DM~-&EMAIL~dN6dTZp3qzrDPmJsihd9zydr4ZadzVcO&EU_AGREED~false&GUID~3AC0A668_AF4E_4021_88C3_172963437EAA&LV~2022-06-24T21:13:47.178-07:00&PRE-MOBILE~Web&NOT_SELL~false&PAGING~-1&SESS-CT~4&STC~3671KW&FB_MINI~false&SUB~true; bnses=ver~4&ats~20220624+21:13&cdclosed~false&filterTooltipClosed~false&ace~false&quickshipseen~false&fbcs~false&imeu~false&nu~false&ss~0&legal_notice_eu_closed~false&mbpop~false&sswpu~false&exitpu~false&spvc~4&deo~false&nogtm~false; device=ver~2&orientation~Landscape&resolution~975x809&device_type~Desktop; bn_uuid-ssn=0ILW2XgXHMktI33B0CaF86Q3sONToYu7yrsxfZk7N8gNSZHylpZCJ2dvtu6MYCzuGVp3KVPop0hwQc7gfjrcf0rweQ8kmDzYUsIQatyy2klUBvDmb9O4O7KsARhEA8uXfsw3ecRVdq2i8DGGjrfiH5BO4; bn_uuid=0ILW2XgXHMktI33B0CaF86Q3sONToYu7yrsxfZk7N8gNSZHylpZCJ2dvtu6MYCzuGVp3KVPop0hwQc7gfjrcf0rweQ8kmDzYUsIQatyy2klUBvDmb9O4O7KsARhEA8uXfsw3ecRVdq2i8DGGjrfiH5BO4; IR_9430=1656130432573|0|1656130432573||; IR_PI=79e3d7ad-1005-38d9-8862-07b2bc3d62bc|1656216832573",
"Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
"Host": "www.bluenile.com",
"Referer": "https://www.bluenile.com/cn/diamond-search",
"sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "Windows",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": 'cors',
"Sec-Fetch-Site": "same-origin",
"x-bn-pageid": "Diamond Search"
}
end_price = '11416.39'
count = 1
count_num = 0
for _ in range(1000):
time_stamp = time.time() * 10
data = {
"startIndex": 0,
"pageSize": 500,
"_": time_stamp,
"unlimitedPaging": "false",
"sortDirection": "asc",
"sortColumn": "price",
"shape": "RD",
"minPrice": end_price,
"minCarat": 0.49,
"maxCarat": 0.79,
"minCut": "Ideal",
"maxCut": "Astor Ideal",
"minColor": "I",
"maxColor": "D",
"minClarity": "SI1",
"maxClarity": "FL",
"minFluorescence": "Faint",
"maxFluorescence": None,
"maxDateType": "MANUFACTURING_NOT_REQUIRED",
"isQuickShip": "false",
"bestValueSort": "false",
"hasVisualization": "false",
"isFiltersExpanded": "true",
"astorFilterActive": "false",
"country": "CHN",
"language": "zh-Hans",
"currency": "CNY",
"productSet": "BNCN"
}
url = 'https://www.bluenile.com/api/public/diamond-search-grid/v2?'
response = requests.post(url, headers=headers, data=data)
content = response.json()
diamond_list = content["results"]
# print(diamond_list[0])
for diamond in diamond_list:
girdle = diamond['girdle'][0] # 腰身
price = diamond['price'][0][4:] # 价格
color = diamond['color'][0] # 颜色
sku = diamond['skus'][0]
fluorescence = diamond['fluorescence'][0] # 荧光
carat = diamond['carat'][0] # 克拉数
clarity = diamond['clarity'][0] # 净度
cut = diamond['cut'][0]['label'] # 切工
if girdle.endswith('Faceted'):
count_num += 1
print("----------------------------",
"*****第{}颗*****".format(count_num),
sku,
"克拉数:{}".format(carat),
"颜色:{}".format(color),
"价格:{}".format(price),
"荧光:{}".format(fluorescence),
"净度:{}".format(clarity),
"切工:{}".format(cut),
"----------------------------",
sep='\n')
# 文件存储
with open('bluenile.csv', 'a', encoding='utf-8-sig') as fp:
s = f'{count_num},{sku},{carat},{color},{price},{fluorescence},{clarity},{cut}\n'
fp.write(s)
end_price = price.replace(',', '')
exclude = 500 * count
print('已扫完{}颗,现在最高价格为:{}'.format(exclude, end_price))
count += 1
time.sleep(10)
输出结果如下所示:
… … … … … … … … … … … …
第1颗
LD16821489
克拉数:0.49
颜色:H
价格:11,418.65
荧光:无
净度:SI1
切工:理想
… … … … … … … … … … … …
已扫完500颗,现在最高价格为:11567.81
通过Excel选出喜欢的并进行标记,再将标记得编号,写成代码,把GIA证书进行下载,逐个对比GIA正证书的参数
import requests
import io
li = ["LD16035818", "LD16887180", "LD17615365", "LD16451860", "LD18785995", "LD15844901", "LD18483769", "LD15517006"]
for b in li:
pdf_url = f'https://bnsec.bluenile.com/bnsecure/certs/{b}/GIA?country=CHN&language=zh-Hans'
send_headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
"Connection": "keep-alive",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8"}
response = requests.get(pdf_url, headers=send_headers)
bytes_io = io.BytesIO(response.content)
# print(bytes_io)
a = bytes_io.getvalue()
# print(a)
with open(f"{b}.PDF", mode='wb') as f:
f.write(bytes_io.getvalue())
print(f'编码{b}_GIA证书.PDF,下载成功!')
输出结果如下所示:
编码LD16035818_GIA证书.PDF,下载成功!
编码LD16887180_GIA证书.PDF,下载成功!
编码LD17615365_GIA证书.PDF,下载成功!