如何用java实现抓取网页?

import java.net.*;
import java.io.*;

public class Catch1 {
     public void test(){
    StringBuffer document= new StringBuffer();
    try {
      URL url = new URL(http://www.sohu.com);
      URLConnection conn = url.openConnection();
      BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
      String line = null;
      while ((line = reader.readLine()) != null)
        document.append(line + "\n");
      reader.close();
    } catch (MalformedURLException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }

    //pzy add
    String str = document.toString();
    String strDir = "E:\\text";
    String strFile = "test.html";
    File myFile = new File(strDir, strFile);

    try {
      myFile.createNewFile();
      BufferedWriter bw = new BufferedWriter(
                            new FileWriter(myFile.getPath(), true));
      bw.write(str);
      bw.flush();
      bw.close();
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }

     public static void main(String[] args){
      Catch1 catch2=new Catch1();
         catch2.test();
           }
}


你可能感兴趣的:(如何用java实现抓取网页?)