使用HttpClient爬取网页数据进行统计(java代码)

1.项目环境
idea:intellij IDEA 2017.1.4
编程语言:java
管理工具:maven
需要导包:httpclient,jsoup(解析html)等,如果要解析json可能还要导入gson的包。
我把重要的依赖贴出来(默认大家都是使用过maven的,如果不使用maven,你可以到网上下载jar包添加到项目里):


    org.apache.httpcomponents
    httpclient
    4.5.2


    org.jsoup
    jsoup
    1.9.2

2.新建一个Demo类 通过doGet();获取网页JSON串 之后调用strWritedToJSONObject();方法进行转换 获取自己想要的数据

@Test
public void test1(){
    String s = doGet("https://fe-api.zhaopin.com/c/i/sou?pageSize=90&cityId=530&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=java&kt=3&_v=0.23892291&x-zp-page-request-id=33cab98577a5491e9aa746397e20f6bf-1552629353210-894761",null);
    strWritedToJSONObject(s);
}
private static void strWritedToJSONObject(String myJsonObj) {
            //将json字符串转换成jsonObject对象
            JSONObject jsonobj = JSON.parseObject(myJsonObj); 
            //得到 data对应JSON对象格式的总信息
            JSONObject data = jsonobj.getJSONObject("data");
            //得到 results 对应JSON数组格式的公司信息
            JSONArray results = data.getJSONArray("results");
            System.out.println("查出信息总条数为"+results.size());
            int f = 0;
            for (int i = 0; i 3){
                    f++;
                }
            }
            System.out.println("福利大于3条的公司有"+f+"家");
}
public static String doGet(String url, Map param) {
    // 创建Httpclient对象
    CloseableHttpClient httpclient = HttpClients.createDefault();
    String resultString = "";
    CloseableHttpResponse response = null;
    try {
        // 创建uri
        URIBuilder builder = new URIBuilder(url);
        if (param != null) {
            for (String key : param.keySet()) {
                builder.addParameter(key, param.get(key));
            }
        }
        URI uri = builder.build();
        // 创建http GET请求
        HttpGet httpGet = new HttpGet(uri);
        // 执行请求
        response = httpclient.execute(httpGet);
        // 判断返回状态是否为200
        if (response.getStatusLine().getStatusCode() == 200) {
            resultString = EntityUtils.toString(response.getEntity(), "UTF-8");
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        try {
            if (response != null) {
                response.close();
            }
            httpclient.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return resultString;
}

还有一种转换的方法是利用Map集合

String s = doGet("https://fe-api.zhaopin.com/c/i/sou?pageSize=90&cityId=489&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw=java&kt=3&rt=b5d064a102f2460c85b60b71c754b89c&_v=0.57815329&x-zp-page-request-id=6d0c9430a00144049f9b084ed003c8a1-1552698577400-459589",null);
            Map map = JSONObject.parseObject(s, Map.class);
            Map map1 = (Map) map.get("data");
            String s1 = map1.get("results").toString();
            List maps = JSONArray.parseArray(s1, Map.class);
            for (Map map2 : maps) {
                Map map3 = (Map) map2.get("company");
                System.out.println(map3.get("name") + " --- " + map2.get("jobName") + "--" + map2.get("salary"));
            }
}

你可能感兴趣的:(调用接口)