Hadoop 系统入门+核心精讲

download:Hadoop 系统入门+核心精讲

package com.zzger.model;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;

import com.zzger.module.queue.UrlQueue;
import com.zzger.util.HttpUtils;
import com.zzger.util.RegexUtils;

public class WebSite {

/**
 * 站点url
 */
private String url;
   
/**
 * 需求匍匐的url队列
 */
private UrlQueue urls = new UrlQueue<>();
   
/**
 * 已匍匐过的页面url
 */
private List exitUrls = Collections.synchronizedList(new ArrayList<>());
   
private static final int TOTAL_THREADS = 12; 
   
private final CountDownLatch mStartSignal = new CountDownLatch(1); 
   
private final CountDownLatch mDoneSignal = new CountDownLatch(TOTAL_THREADS);  
   
public WebSite(String url){
    this.url = url;
    urls.offer(url);//把网站首页参加需求匍匐的队列中
}
   
public void guangDu(){
    new Thread(new Runnable() {
        @Override
        public void run() {
            paxing(HttpUtils.httpGet(url));
        }
    }).start();
}
   
public void paxing(String html){
    if(html.lastIndexOf("下一页

")<0) return ;

    String strList = html.substring(html.indexOf("
  • "), html.lastIndexOf("下一页
  • "));

        String url = RegexUtils.RegexString(" list = page.ybhqSection().getSections();
        for(Section section : list){
            new Thread(new Runnable() {
                @Override
                public void run() {
                    mStartSignal.countDown();// 计数减一为0,工作线程真正启动详细操作  
                    try {
                        mStartSignal.await();// 阻塞,等候mStartSignal计数为0运转后面的代码  
                        // 一切的工作线程都在等候同一个启动的命令  
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                    DuanZi duanzi = section.select().getModel();
                    System.out.println(duanzi.getTitle());
                    mDoneSignal.countDown();// 完成以后计数减一  
                }
            }
            ).start();
        }
        try
        { 
            mDoneSignal.await();// 等候一切工作线程完毕  
        } 
        catch (InterruptedException e) 
        { 
            e.printStackTrace(); 
        } 
        dxcPx();//线程任务执行完后,再次获取url队列停止任务
    }
    public static void main(String[] args) {
        WebSite web = new WebSite("http://duanziwang.com");
        web.guangDu();
        for(int i = 0; i<10;i++){
            new Thread(new Runnable() {
                @Override
                public void run() {
                    web.dxcPx();
                }
            }).start();
        }
           
    }

    }

    你可能感兴趣的:(hadoop)