网页数据抓取之大众点评数据

package com.atman.baiye.store.utils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.atman.baiye.store.domain.AiCommonInfo;

/**
 * remark
 * [email protected]
 * 2017年1月18日下午4:49:56
 */
public class GoodsDianPingUtils {
    
    public static final String DIANPING_URL = "http://www.dianping.com/search/keyword/1/0_"; 
    public static String location_city = "";
    
    public static String getLocationCity(String data){
        int beginIndex = data.indexOf("city J-city\">");
        int endIndex = data.lastIndexOf("city-list J-city-list Hide");
        String str = data.substring(beginIndex, endIndex);
        System.out.println("str=="+str);
        location_city = str.substring(str.indexOf("J-city\">")+"J-city\">".length(), str.indexOf(""));
        System.out.println("location_city:"+location_city);
        return location_city;
    }
    
    public static List getItems(String data){
        int startIndex = data.indexOf("
"); int endIndex = data.lastIndexOf("附近"); String dataStr = data.substring(startIndex, endIndex); List list = new ArrayList(); String arrayitem[] = dataStr.split("附近"); for (int i = 0; i < arrayitem.length; i++) { if(i == 10){ break ; } String item = arrayitem[i]+"附近"; System.out.println("item:"+item); list.add(item); } return list; } public static Map getElementVal(String item){ Map map = new HashMap(); int beginIndex = 0; int endIndex = 1; String store_name = ""; if(item.contains("

") && item.contains("

")){ beginIndex = item.indexOf("

"); endIndex = item.indexOf("

"); store_name = item.substring(beginIndex+"

".length(), endIndex); map.put("store_name", store_name); map.put("title", store_name); } System.out.println("store_name:"+store_name); String store_url = "http://www.dianping.com/"; String storeurlStr = item.substring(0, item.indexOf("#comment")); beginIndex = storeurlStr.lastIndexOf("")); String pic_url = pic_urlstr.substring(0, pic_urlstr.indexOf("\"/>")); map.put("pic_url", pic_url); map.put("detail_url", pic_url); System.out.println("pic_url:"+pic_url); //price String pricestr = item.substring(item.indexOf("¥")+"¥".length(), item.indexOf("
")); String price = pricestr.substring(0, pricestr.indexOf("")); map.put("price", price); System.out.println("price:"+price); map.put("seserev_price", "0"); return map; } public static List getGoodsInfoList(String jsonInfo, String keyword) { List aiCommonInfoList = new ArrayList(); List datalist = getItems(jsonInfo); for (String dataitem : datalist) { Map map = getElementVal(dataitem); AiCommonInfo aiCommonInfo = new AiCommonInfo(); aiCommonInfo.setType(1002); aiCommonInfo.setTitle((String)map.get("title")); aiCommonInfo.setPicUrl(map.get("pic_url")); aiCommonInfo.setDetailUrl(map.get("detail_url")); aiCommonInfo.setKeyword(keyword); aiCommonInfo.setType(1006); aiCommonInfo.setSource(3); String price = (String)map.get("price"); aiCommonInfo.setPrice(Double.parseDouble(price)); String reserve_price = map.get("seserev_price"); aiCommonInfo.setReservePrice(Double.parseDouble(reserve_price)); aiCommonInfo.setStoreName(map.get("store_name")); aiCommonInfo.setStoreUrl(map.get("store_url")); aiCommonInfo.setLocation(location_city); aiCommonInfoList.add(aiCommonInfo); } return aiCommonInfoList; } public static void main(String[] args) { String data = WebHttpClient.getBebContentByURL(DIANPING_URL,"火锅", true, ""); getLocationCity(data); List datalist = getItems(data); for (String item : datalist) { getElementVal(item); } } }


你可能感兴趣的:(java,notes,web网页数据抓取)