网页爬虫

/*

    网页爬虫

*/



import java.io.*;

import java.net.*;

import java.util.regex.*;



class RegexTest2

{

    public static void main(String[] args) throws Exception

    {

        getMails();

    }



    public static void getMails() throws Exception

    {

        URL url = new URL("要抓取的内容");

        URLConnection conn = url.openConnection();

        BufferedReader bufr = new BufferedReader(new InputStreamReader(conn.getInputStream()));



        String line = null;

        String mailReg = "正则表达式";



        Pattern p = Pattern.compile(mailReg);



        while((line = bufr.readLine()) != null){

            Matcher m = p.matcher(line);

            while(m.find()){

                System.out.println(m.group());

            }

        }

    }

}

你可能感兴趣的:(爬虫)