Java天猫商品价格爬取

Java天猫商品价格爬取


现在各大电商平台都有反扒系统,天猫是对复杂的,不仅要找到js还要设置header,而且链接还长!这么长的代码有用的就两个,一个id,一个skuid

代码实现


	org.apache.httpcomponents
	httpclient
	4.4

public static void main(String[] args) throws Exception {
		//天猫链接
		String url = "https://detail.tmall.com/item.htm?spm=a220m.1000858.1000725.13.334a2516Z44rJj&id=605891749329&skuId=4247606565698&areaId=110100&user_id=2397808261&cat_id=2&is_b=1&rn=796960b49886054f5adf63e3732263bf";
		int beginId = url.indexOf("&id=")+"&id=".length();
		String ss = url.substring(beginId);
		int endId = ss.indexOf("&");
		//截取链接中的id
		String id = url.substring(beginId,endId+beginId);
		int beginskuId = url.indexOf("skuId=")+"skuId=".length();
		String ss1 = url.substring(beginskuId);
		int endskuId = ss1.indexOf("&");
		//截取链接中的skuId
		String skuId = url.substring(beginskuId,endskuId+beginskuId);
		//拼接成新的天猫链接
		String newUrl = "https://detail.tmall.com/item.m?id="+id+"&skuId="+skuId;
		//拼接成新的获取价格的链接
		String priceUrl = "https://mdskip.taobao.com/core/initItemDetail.htm?itemId="+id;
        HttpClientBuilder builder = HttpClients.custom();
        builder.setUserAgent("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:0.9.4)");
        CloseableHttpClient httpClient = builder.build();
        final HttpGet httpGet = new HttpGet(priceUrl);
        //因天猫反扒机制,设置header
        httpGet.addHeader("Referer", "https://mdskip.taobao.com//core/initItemDetail.htm/_____tmd_____/punish?x5secdata=5e0c8e1365474455070961b803bd560607b52cabf5960afff39b64ce58073f78005654c1c031882a4c6dbedc85c51a441e3b919afa3298cff90c7626668fde860480def1935cf7544236ad19f2057552faa04c5a4741d78a3444916b235ae29cba45bd36bb8e49de97f26cfdecdbf948396052f1caa3b074546afe1c63fda94f00013ede75fd2a9d8eb3665574184336b45fc8a83fb7899cb8ec1e17b434d60fc4f66162bb2f483ccf2b55d158c298559fdc7b6ce8d2a594959dc501c6600df14872d54e92099cf7195680d2ba3b88511f76a4dbb2b594f8c93b60b948d1702fc695fdfb4765ce3b35f862ccc49a7ddbc070bd41eaf21a1d470b225d2dd40c0cafb3f59c461c51b8d9da168f0e68f989878b25517da9db5e2a0f7f0a1b8e6130c9c58bbe9ca0d4667afa0e550cc8ca351677f0472a23701cb860d0d41b647a37c8248933146442ba6ff7f958e4788f6268332ab21102bb58aa52e29810b4b19c0c1df8c88bfca12a767e0118f8a56142989ab47c351d91b6c92135d43282b79e7761d10039ab8ebcc28bec399e580b54b24749603dfa93a95d2702cbc2acfc327f5f8a7739d78cacb54f2d194ad3c969af6c5a446d8cd63d7eaa38b14d6c7a048ddf8ade6b164b9479e80cc95859b3f7&x5step=2");
        httpGet.addHeader("Sec-Fetch-Mode", "no-cors");
        httpGet.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36");
        httpGet.addHeader("cookie", "t=6a61f92ccbd970e84526ac1f5f16b5ca; UM_distinctid=16dc4a60e3b604-066db812675b97-386a410b-100200-16dc4a60e3c2c6; miid=248548291090120761; tk_trace=oTRxOWSBNwn9dPyorMJE%2FoPdY8zfvmw%2Fq5hmCUfRWfRinUXBTBL%2BCO%2FNzRpSKWNvEzD5AocUKglf3lGWFQAqmEdNHvK6DuuBIN20rHgsBPCaU9DrSMoukYv0XWHaEJsmT0d7YdHvMiCLs6Jt7vIT%2BqnJvRzOAaMVzZJjOKEECr5uohFVYvvaBN7MUJjRsGU3g8mhPnv%2BHhlfUAnfGnZeMe2yT2M%2BWxPlTYMMlH0gytXsLZb%2BkDuGefoQG7caVJjdMuCN2Voc67K%2FWCfrEG25oRrsog%3D%3D; cookie2=120e0680b568ad50f0c72733499e7956; _tb_token_=fed3d4e460706; v=0; cna=8tEpFixmOiICAW64zk6EFHTi; unb=2044069096; uc1=existShop=false&pas=0&tag=8&lng=zh_CN&cookie14=UoTbldM7nmU%2Fvw%3D%3D&cookie16=VT5L2FSpNgq6fDudInPRgavC%2BQ%3D%3D&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&cookie21=URm48syIZJwTkNGk7euL6g%3D%3D; uc3=id2=UUjViSNYfbKLAw%3D%3D&lg2=Vq8l%2BKCLz3%2F65A%3D%3D&vt3=F8dBxdgpCfFJT%2BAlFmA%3D&nk2=D85B8wMn1%2B%2BGl1pynPVn; csg=b2beedab; lgc=lmd%5Cu5F85%5Cu4F60%5Cu957F%5Cu53D1%5Cu53CA%5Cu8170; cookie17=UUjViSNYfbKLAw%3D%3D; dnk=lmd%5Cu5F85%5Cu4F60%5Cu957F%5Cu53D1%5Cu53CA%5Cu8170; skt=ed8770349f9c6948; existShop=MTU3ODE4ODE2NQ%3D%3D; uc4=nk4=0%40De3W3b9EisQm60ydmctSvpRH%2FFEV6sw56RY%3D&id4=0%40U2o3vUzmEscWShqVUst018sE7Tjc; tracknick=lmd%5Cu5F85%5Cu4F60%5Cu957F%5Cu53D1%5Cu53CA%5Cu8170; _cc_=U%2BGCWk%2F7og%3D%3D; tg=0; _l_g_=Ug%3D%3D; sg=%E8%85%B063; _nk_=lmd%5Cu5F85%5Cu4F60%5Cu957F%5Cu53D1%5Cu53CA%5Cu8170; cookie1=AnCBIq9bOXVNWWlLCosHrWeRXuCnIYtL8x9Jt2vZR%2Bo%3D; enc=zoiogTkjkBtYr7w9dRq32fT7A2QRUm%2BmihpGKejzoHJ5V7bpTmXohUbHW0hPAqmdvvJ5kjRvIiN3BOvq8xuTmg%3D%3D; isg=BLy8y4Bjzl7KS_oCe0icPF42jVquHWDX1pn1I5Y9w6eKYVzrvsFqbzPQRcm8LJg3; l=dBxkNf0qQLHcPYBTBOCaourza77TIIRYSuPzaNbMi_5Qg6Ts_-bOoxN1tF96cjWf9lTB45113tv9-etk2UMqWXSpXUJ6nxDc.; ucn=center; x5sec=7b226d616c6c64657461696c736b69703b32223a226665386164646134356164313163333737633735623463313535353734383134434a793078664146454f37653662762b322f436548786f4d4d6a41304e4441324f5441354e6a7331227d");
        CloseableHttpResponse response = null;
        String result = null;
            try {
            	response = httpClient.execute(httpGet);
            	
            	final HttpEntity entity = response.getEntity();
            	
            	if (entity != null) {
            		result = EntityUtils.toString(entity);
				EntityUtils.consume(entity);
				//获取价格
				JSONObject object = new JSONObject(result);
				JSONObject object2 = (JSONObject)object.get("defaultModel");
				JSONObject object3 = (JSONObject)object2.get("itemPriceResultDO");
				JSONObject object4 = (JSONObject)object3.get("priceInfo");
				JSONObject object5 = (JSONObject)object4.get(skuId);
				JSONArray jsonArray = new JSONArray(object5.get("promotionList").toString());
				if(jsonArray.length()==1){
					JSONObject object6 = (JSONObject)jsonArray.get(0);
					System.out.println("价格===="+object6.get("price"));
				}
				
				response.close();
				httpClient.close();
            	}
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
        }

运行结果

在这里插入图片描述

你可能感兴趣的:(爬取,java)