实现:
package com.sichan.one;
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import com.sichan.bean.Link;
import com.sichan.util.DBUtil;
import com.sichan.util.JsoupUtil;
public class GetLink {
private JsoupUtil ju = JsoupUtil.getInstance();
private DBUtil du = DBUtil.getInstance();
private Link link = new Link();
private String insertSql = "";
public void getLink(String url){
try {
Document document = Jsoup.connect(url).get();
Elements contents = document.select("body");
Elements postEl = contents.select(".top-fixed-box h1");
Elements company_nameEl = contents.select(".top-fixed-box h2");
Elements salaryEl = contents.select("li:matches(职位月薪)");
Elements addressEl = contents.select("li:matches(工作地点)");
Elements release_timeEl = contents.select("li:matches(发布日期)");
Elements job_natureEl = contents.select("li:matches(工作性质)");
Elements experienceEl = contents.select("li:matches(工作经验)");
Elements edu_backgroundEl = contents.select("li:matches(最低学历)");
Elements recruiting_numEl = contents.select("li:matches(招聘人数)");
Elements job_categoryEl = contents.select("li:matches(职位类别)");
Elements company_scaleEl = contents.select("li:matches(公司规模)");
Elements company_natureEl = contents.select("li:matches(公司性质)");
Elements company_industryEl = contents.select("li:matches(公司行业)");
SubLink sublink = new SubLink();
link.setNum(GetHtml.num);
link.setPost(postEl.text());
link.setCompany_name(company_nameEl.text());
sublink.subSalary(salaryEl.text());
link.setLow_salary(sublink.low_salary);
link.setHigh_salary(sublink.high_salary);
// System.out.println(salaryEl.text());
sublink.subAddress(addressEl.text());
link.setAddress(sublink.subAddress);
sublink.subRelease_time(release_timeEl.text());
link.setRelease_time(sublink.subRelease_time);
sublink.subJob_nature(job_natureEl.text());
link.setJob_nature(sublink.subJob_nature);
sublink.subExperience(experienceEl.text());
link.setExperience(sublink.subExperience);
sublink.subEdu_background(edu_backgroundEl.text());
link.setEdu_background(sublink.subEdu_background);
sublink.subRecruiting_num(recruiting_numEl.text());
link.setRecruiting_num(sublink.subRecruiting_num);
sublink.subJob_category(job_categoryEl.text());
link.setJob_category(sublink.subJob_category);
// System.out.println(sublink.subJob_category);
sublink.subCompany_scale(company_scaleEl.text());
link.setCompany_scale(sublink.subCompany_scale);
sublink.subCompany_nature(company_natureEl.text());
link.setCompany_nature(sublink.subCompany_nature);
sublink.subCompany_industry(company_industryEl.text());
link.setCompany_industry(sublink.subCompany_industry);
System.out.println(link.toString());
// insertSql = ju.getInsertSql(link);
// du.insert(insertSql);
} catch (IOException e) {
e.printStackTrace();
}
}
}
package com.sichan.one;
public class SubLink {
double low_salary;
double high_salary;
String subAddress;
String subRelease_time;
String subJob_nature;
String subExperience;
String subEdu_background;
Integer subRecruiting_num;
String subJob_category;
String subCompany_scale;
String subCompany_nature;
String subCompany_industry;
/**
* 对抓取字符串类型的月薪转换成数值类型
* @param salary
*/
public void subSalary(String salary){
String salary01;
if(salary.indexOf("以")!=-1){
salary01 = salary.substring(5, salary.length()-6);
low_salary = Double.valueOf(salary01);
high_salary = Double.valueOf(salary01);
}else if(salary.indexOf("元")!=-1){
salary01 = salary.substring(5, salary.length()-4);
String[] salarys = salary01.split("-");
String low = salarys[0];
String high = salarys[1];
double low_salary01 = Double.valueOf(low);
if(low_salary01 % 2 == 0){
low_salary = low_salary01;
}else{
low_salary = low_salary01-1;
}
high_salary = Double.valueOf(high);
}else{
low_salary = -1;
high_salary = -1;
}
}
/**
* 对抓取的地址进行裁剪
* @param address
*/
public void subAddress(String address){
subAddress = address.substring(5,7);
}
public void subRelease_time(String release_time){
subRelease_time = release_time.substring(5);
}
public void subJob_nature(String job_nature){
subJob_nature = job_nature.substring(5);
}
public void subExperience(String experience){
subExperience = experience.substring(5);
}
public void subEdu_background(String edu_background){
subEdu_background = edu_background.substring(5);
}
/**
* 抓取招聘人数并进行字符串转换为数值型
* @param recruiting_num
*/
public void subRecruiting_num(String recruiting_num){
String recruiting_num01 = recruiting_num.substring(5);
if(recruiting_num01.indexOf("人")!=-1){
String recruiting_num02 = recruiting_num01.substring(0,recruiting_num01.length()-1);
subRecruiting_num = Integer.valueOf(recruiting_num02);
}else{
subRecruiting_num = -1;
}
}
public void subJob_category(String job_category){
if(job_category.indexOf("您也许对")!=-1){
subJob_category = job_category.substring(5,job_category.length()-15);
}else{
subJob_category = job_category.substring(5);
}
}
public void subCompany_scale(String company_scale){
subCompany_scale = company_scale.substring(5);
}
public void subCompany_nature(String company_nature){
subCompany_nature = company_nature.substring(5);
}
public void subCompany_industry(String company_industry){
subCompany_industry = company_industry.substring(5);
}
}
package com.sichan.bean;
import java.io.Serializable;
public class Link implements Serializable{
private static final long serialVersionUID = 1165098694307553167L;
private int num;
private String post;
private String company_name;
private double low_salary;
private double high_salary;
private String address;
private String release_time;
private String job_nature;
private String experience;
private String edu_background;
private int recruiting_num;
private String job_category;
private String company_scale;
private String company_nature;
private String company_industry;
public int getNum() {
return num;
}
public void setNum(int num) {
this.num = num;
}
public String getPost() {
return post;
}
public void setPost(String post) {
this.post = post;
}
public String getCompany_name() {
return company_name;
}
public void setCompany_name(String company_name) {
this.company_name = company_name;
}
public double getLow_salary() {
return low_salary;
}
public void setLow_salary(double low_salary) {
this.low_salary = low_salary;
}
public double getHigh_salary() {
return high_salary;
}
public void setHigh_salary(double high_salary) {
this.high_salary = high_salary;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getRelease_time() {
return release_time;
}
public void setRelease_time(String release_time) {
this.release_time = release_time;
}
public String getJob_nature() {
return job_nature;
}
public void setJob_nature(String job_nature) {
this.job_nature = job_nature;
}
public String getExperience() {
return experience;
}
public void setExperience(String experience) {
this.experience = experience;
}
public String getEdu_background() {
return edu_background;
}
public void setEdu_background(String edu_background) {
this.edu_background = edu_background;
}
public String getCompany_scale() {
return company_scale;
}
public void setCompany_scale(String company_scale) {
this.company_scale = company_scale;
}
public String getCompany_nature() {
return company_nature;
}
public void setCompany_nature(String company_nature) {
this.company_nature = company_nature;
}
public int getRecruiting_num() {
return recruiting_num;
}
public void setRecruiting_num(int recruiting_num) {
this.recruiting_num = recruiting_num;
}
public String getJob_category() {
return job_category;
}
public void setJob_category(String job_category) {
this.job_category = job_category;
}
public String getCompany_industry() {
return company_industry;
}
public void setCompany_industry(String company_industry) {
this.company_industry = company_industry;
}
@Override
public String toString() {
return "Link [num=" + num + ", post=" + post + ", company_name=" + company_name + ", low_salary=" + low_salary
+ ", high_salary=" + high_salary + ", address=" + address + ", release_time=" + release_time
+ ", job_nature=" + job_nature + ", experience=" + experience + ", edu_background=" + edu_background
+ ", recruiting_num=" + recruiting_num + ", job_category=" + job_category + ", company_scale="
+ company_scale + ", company_nature=" + company_nature + ", company_industry=" + company_industry + "]";
}
}
输出结果: