最近北京联通号码出来了185号段,一直没有办北京的号码,在联通网上营业厅上选号码,所以就萌发了选个靓号的想法,于是乎翻了网站的结构
立马开始写程序获取手机号码,然后对手机号码筛选靓号(正则匹配,反向引用,零宽断言).
种子地址:
http://num.10010.com/NumApp/GoodsDetail/queryMoreNums?callback=jsonp_queryMoreNums&province=11&cityCode=110&rankMoney=76&q_p=${page}&net=01&preFeeSel=0&Show4GNum=TRUE&_=${random}
用HttpClient做的http请求,返回值是Json格式(用的jsel表达式,个人觉得很犀利的),手机号码中存在moreNumArray的属性里,数组类型.
获取数据,将数据分类筛选,筛选出AAAA,AAA,AABB,ABCD,ABC,DCBA,CBA类型的手机号码,然后保存到本地文件,就是这么简单,程序实现如下:
public class PhoneNumber { private static Set<String> NO4 = new TreeSet<String>(); private static Set<String> AAAA = new TreeSet<String>(); private static Set<String> AAA = new TreeSet<String>(); private static Set<String> AABB = new TreeSet<String>(); private static Set<String> ABCD = new TreeSet<String>(); private static Set<String> DCBA = new TreeSet<String>(); private static Set<String> ABC = new TreeSet<String>(); private static Set<String> CBA = new TreeSet<String>(); private static AtomicLong phoneNumberSize = new AtomicLong(0); public static void main(String[] args) throws IOException, URISyntaxException { String seed = "http://num.10010.com/NumApp/GoodsDetail/queryMoreNums?callback=jsonp_queryMoreNums&province=11&cityCode=110&rankMoney=76&q_p=${page}&net=01&preFeeSel=0&Show4GNum=TRUE&_=${random}"; BasicCookieStore cookieStore = new BasicCookieStore(); CloseableHttpClient httpClient = HttpClients.custom().setDefaultCookieStore(cookieStore).build(); try { for (int i = 0; i < 100; i++) { HttpGet httpget = new HttpGet(seed.replace("${page}", new Integer(1).toString()).replace("${random}", String.valueOf(new Date().getTime()))); request(httpClient, httpget); } print(); report(); } finally { httpClient.close(); } } private static void report() throws IOException { writer("AAAA",AAAA); writer("AAA",AAA); writer("AABB",AABB); writer("ABCD",ABCD); writer("ABC",ABC); writer("DCBA",DCBA); writer("CBA",CBA); } private static void writer(String name, Set set) throws IOException { File file = new File("./${name}-${data}.phone".replace("${name}", name).replace("${data}", new SimpleDateFormat("yyyy-MM-dd-HH-mm-ss").format(new Date()))); file.createNewFile(); PrintWriter writer = new PrintWriter(file); writer.println("report:"); writer.println("================================="); writer.println("size : ".concat(String.valueOf(set.size()))); for (Iterator iterator = set.iterator(); iterator.hasNext(); ) { writer.println(iterator.next()); } writer.close(); } private static CloseableHttpResponse request(CloseableHttpClient httpClient, HttpGet httpget) throws IOException { CloseableHttpResponse response = httpClient.execute(httpget); try { HttpEntity entity = response.getEntity(); // String text = getText(entity.getContent(), Charset.defaultCharset()); String json = text.replaceAll("jsonp_queryMoreNums\\((.*)\\);", "$1"); Map<String, Object> decode = JSONDecoder.decode(json); List moreNumArray = (List) decode.get("moreNumArray"); int size = moreNumArray.size() / 7; phoneNumberSize.addAndGet(size); for (int i = 0; i < size; i++) { String phoneNo = moreNumArray.get(i * 7).toString(); if (/*AAAA*/phoneNo.matches("\\d*(\\d)\\1{3,}\\d*")) { AAAA.add(phoneNo); } else if (/*AAA*/phoneNo.matches("\\d*(\\d)\\1{2,}\\d*")) { AAA.add(phoneNo); } else if (/*AABB*/phoneNo.matches("\\d*(\\d)\\1(\\d)\\2\\d*")) { AABB.add(phoneNo); } else if (/*ABCD*/phoneNo.matches("\\d*(?:(?:0(?=1)|1(?=2)|2(?=3)|3(?=4)|4(?=5)|5(?=6)|6(?=7)|7(?=8)|8(?=9)){3,})\\d*")) { ABCD.add(phoneNo); } else if (/*DCBA*/phoneNo.matches("\\d*(?:9(?=8)|8(?=7)|7(?=6)|6(?=5)|5(?=4)|4(?=3)|3(?=2)|2(?=1)|1(?=0)){3,}\\d*")) { DCBA.add(phoneNo); } else if (/*ABC*/phoneNo.matches("\\d*(?:(?:0(?=1)|1(?=2)|2(?=3)|3(?=4)|4(?=5)|5(?=6)|6(?=7)|7(?=8)|8(?=9)){2,})\\d*")) { ABC.add(phoneNo); } else if (/*CBA*/phoneNo.matches("\\d*(?:9(?=8)|8(?=7)|7(?=6)|6(?=5)|5(?=4)|4(?=3)|3(?=2)|2(?=1)|1(?=0)){2,}\\d*")) { CBA.add(phoneNo); } else if (!phoneNo.matches("\\d*4\\d*")/*NO4*/) { NO4.add(phoneNo); } } } finally { response.close(); } return response; } private static void print() { System.out.println("report:"); System.out.println("================================="); System.out.println("size : ".concat(phoneNumberSize.toString())); System.out.println("\tAAAA : ".concat(String.valueOf(AAAA.size()))); System.out.println("\tAAA : ".concat(String.valueOf(AAA.size()))); System.out.println("\tAABB : ".concat(String.valueOf(AABB.size()))); System.out.println("\tABCD : ".concat(String.valueOf(ABCD.size()))); System.out.println("\tABC : ".concat(String.valueOf(ABC.size()))); System.out.println("\tDCBA : ".concat(String.valueOf(DCBA.size()))); System.out.println("\tCBA : ".concat(String.valueOf(CBA.size()))); } public final static String getText(InputStream inputStream, Charset charset) throws IOException { StringBuilder text = new StringBuilder(); try { BufferedReader read = new BufferedReader(new InputStreamReader(inputStream, charset.name())); String line = null; while ((line = read.readLine()) != null) { text.append(line); } } finally { if (inputStream != null) { inputStream.close(); } } return text.toString(); } }
采集了100次,很快完成了,保存了本地文件,文件如下:
打开文件一看,分类完成.
兴高采烈的去联通网上营业厅搜索.结果发现没有抓取到的号码,也就是被筛选掉了(喷血),而后查看了他网页的实现,有发现元素纪录着号码信息,也就是numid,也尝试了提交订单,提交时有号码id做校验,困了,也不蛋疼了,睡觉!
<p style="display:none;" id="numInfo" numid="numIdVal18515291341" num="185 1529 1341" price="<span>¥0</span> " monfee="0" nummemo="号码要求月承诺消费0元" numprefee="0" numisnicerule="0" numlevel="0" montime="0"></p>