java爬虫-基于jsoup的简单爬虫实现(从智联获取工作信息)

先看效果,好的话就点个赞,jar包上个爬虫笔记中有链接

java爬虫-基于jsoup的简单爬虫实现(从智联获取工作信息)_第1张图片

先来一个热热身:

package com.demo.test;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**从智联招聘获取招聘信息
 * @url 智联招聘网站链接(建议不要更改)
 * @city 搜索工作的城市
 * @keywrods 搜索工作的相关关键字
 * 分页
 * 保存到txt文件
 */

public class t3 {
    private String url="http://sou.zhaopin.com/jobs/searchresult.ashx?jl=";  //智联招聘网站
    private  String city="西安"; //搜索工作的城市
    private  String keywords="java";  //搜索工作的关键字
    public t3(String city,String keywords){
        this.city=city;
        this.keywords =keywords;
    }

    public void getZhiLianWork(){
        try {
            for (int i=0;i<10;i++) {
                System.out.println("*********开始遍历第"+(i+1)+"页的求职信息*********");
                Document doc = Jsoup.connect(url+city+"&kw="+keywords+"&p="+(i+1)+"&isadv=0").get();
                Element content = doc.getElementById("newlist_list_content_table");
                Elements zwmcEls = content.getElementsByClass("zwmc");
                Elements gsmcEls = content.getElementsByClass("gsmc");
                Elements zwyxEls = content.getElementsByClass("zwyx");
                Elements gzddEls = content.getElementsByClass("gzdd");
                Elements gxsjEls = content.getElementsByClass("gxsj");
                for(int j = 0;j

优化输出,更新源代码,在html表格输出

package com.demo.test;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class t4{

    public static void main(String[] args) {
        try {
            String url ="http://sou.zhaopin.com/jobs/searchresult.ashx?";
            String city ="西安";
            String keywords = "java";
            BufferedWriter bWriter = new BufferedWriter(
                    new OutputStreamWriter(
                            new FileOutputStream("F:\\pp\\out.html"),"utf-8"));
            bWriter.write("");


            File input = new File("F:\\pp\\output.html");
            Document doc2 = Jsoup.parse(input, "UTF-8", "");
            Element table = doc2.getElementById("workinfo");
            table.text("");
            Element theader = table.appendElement("tr");
            theader.appendElement("th").text("序号");
            theader.appendElement("th").text("职位名称");
            theader.appendElement("th").text("公司名称");
            theader.appendElement("th").text("职位月薪");
            theader.appendElement("th").text("工作地点");
            theader.appendElement("th").text("发布日期");


            for(int page=0;page<10;page++){
                Document doc = Jsoup.connect(url+city+"&kw="+keywords+"&p="+page).get();
                Element content = doc.getElementById("newlist_list_content_table");
                Elements zwmcEls = content.getElementsByClass("zwmc");
                Elements gsmcEls = content.getElementsByClass("gsmc");
                Elements zwyxEls = content.getElementsByClass("zwyx");
                Elements gzddEls = content.getElementsByClass("gzdd");
                Elements gxsjEls = content.getElementsByClass("gxsj");

                for(int i = 1;i

output.html模板:



 
  
  
  
  
  
  智联工作信息
  
 
 
    
智联工作信息
版权所有 翻版必究@2017 sysker



你可能感兴趣的:(其他学习)