使用httpclient调用google在线翻译以及爱词霸在线翻译,并使用HTMLParser对返回的结果进行处理,以此实现一个简单的在线翻译小程序,并利用多线程编程合并两个翻译工具。具体实现如下:
1.multithreadDict.java
01 |
package MultiTread; |
02 |
import java.util.Scanner; |
03 |
public class multithreadDict { |
04 |
public static void main(String[] args) throws InterruptedException { |
05 |
System.out.print( "Input the word > " ); |
06 |
Scanner s = new Scanner(System.in); |
07 |
String input= s.nextLine(); |
08 |
|
09 |
GoogleDict google= new GoogleDict(input); |
10 |
google.start(); |
11 |
//google.sleep(2000); |
12 |
IcibaDict iciba= new IcibaDict(input); |
13 |
iciba.start(); |
14 |
//iciba.sleep(4000); |
15 |
} |
16 |
} |
注:本来我想先显示google翻译的结果,后显示爱词霸翻译的结果,但即使使用sleep函数也很难保证这一点。
Java线程调度是Java多线程的核心,只有良好的调度,才能充分发挥系统的性能,提高程序的执行效率。
线程休眠是使线程让出CPU的最简单的做法之一,线程休眠时候,会将CPU资源交给其他线程,以便能轮换执行,当休眠一定时间后,线程会苏醒,进入准备状态等待执行。
线程休眠的方法是Thread.sleep(long millis) 和Thread.sleep(long millis, int nanos) ,均为静态方法。简单说,哪个线程调用sleep,就休眠哪个线程。
2.GoogleDict.java
01 |
package MultiTread; |
02 |
import java.io.File; |
03 |
import java.io.FileWriter; |
04 |
import java.net.URI; |
05 |
import org.apache.http.HttpEntity; |
06 |
import org.apache.http.HttpResponse; |
07 |
import org.apache.http.client.HttpClient; |
08 |
import org.apache.http.client.methods.HttpGet; |
09 |
import org.apache.http.client.utils.URIUtils; |
10 |
import org.apache.http.impl.client.DefaultHttpClient; |
11 |
import org.apache.http.util.EntityUtils; |
12 |
import org.htmlparser.Node; |
13 |
import org.htmlparser.NodeFilter; |
14 |
import org.htmlparser.Parser; |
15 |
import org.htmlparser.filters.OrFilter; |
16 |
import org.htmlparser.filters.TagNameFilter; |
17 |
import org.htmlparser.util.NodeList; |
18 |
import org.htmlparser.visitors.TextExtractingVisitor; |
19 |
class GoogleDict extends Thread |
20 |
{ |
21 |
private String searchterm= null ; |
22 |
public GoogleDict(String input) |
23 |
{ |
24 |
this .searchterm=input; |
25 |
} |
26 |
public void run() |
27 |
{ |
28 |
String text= null ; |
29 |
//http://www.google.com/dictionary?source=translation&hl=zh-CN&q=computer&langpair=en|zh-CN |
30 |
try |
31 |
{ |
32 |
HttpClient httpclient = new DefaultHttpClient(); |
33 |
String searchstring = "source=translation&hl=zh-CN&q=" + searchterm + "&langpair=en%7Czh-CN" ; |
34 |
URI uri=URIUtils.createURI( "http" , "www.google.com" , - 1 , "/dictionary" , searchstring, null ); |
35 |
HttpGet httpget = new HttpGet(uri); |
36 |
HttpResponse response = httpclient.execute(httpget); |
37 |
HttpEntity entity = response.getEntity(); |
38 |
|
39 |
if (entity != null ) { |
40 |
Parser parser = new Parser(EntityUtils.toString(entity)); |
41 |
parser.setEncoding( "gb2312" ); |
42 |
//NodeFilter filter_tab_content =new OrFilter( new TagNameFilter("div"),new TagNameFilter("span")); |
43 |
NodeFilter filter_tab_content= new TagNameFilter( "div" ); |
44 |
//NodeFilter filter_tab_content=new TagNameFilter("span"); |
45 |
NodeList nodelist_tab_content = parser.parse(filter_tab_content); |
46 |
int length = nodelist_tab_content.size(); |
47 |
if (searchterm.getBytes().length==searchterm.length()) |
48 |
{ |
49 |
for ( int i = 10 ; i < length- 3 ; i++) { |
50 |
Node node_tab_content = nodelist_tab_content.elementAt(i); |
51 |
Parser parser_tab_content = new Parser(node_tab_content |
52 |
.toHtml()); |
53 |
TextExtractingVisitor visitor_tab_content = new TextExtractingVisitor(); |
54 |
parser_tab_content.visitAllNodesWith(visitor_tab_content); |
55 |
text = text+ "/n" +visitor_tab_content.getExtractedText().trim(); |
56 |
} |
57 |
} |
58 |
else |
59 |
{ |
60 |
for ( int i = 8 ; i < length- 3 ; i++) { |
61 |
Node node_tab_content = nodelist_tab_content.elementAt(i); |
62 |
Parser parser_tab_content = new Parser(node_tab_content |
63 |
.toHtml()); |
64 |
TextExtractingVisitor visitor_tab_content = new TextExtractingVisitor(); |
65 |
parser_tab_content.visitAllNodesWith(visitor_tab_content); |
66 |
text = text+ "/n" +visitor_tab_content.getExtractedText().trim(); |
67 |
} |
68 |
} |
69 |
text=text.replaceAll( "相关搜索" , "相关搜索:" ); |
70 |
text=text.replaceAll( "null" , "" ); |
71 |
text=text.replaceAll( "/n/n" , "/n" ); |
72 |
text=text.replaceAll( "/n/n" , "/n" ); |
73 |
text=text.replaceAll( "/n/n" , "/n" ); |
74 |
|
75 |
System.out.println( "-----------------------------------------" + |
76 |
"谷歌翻译-------------------------------------------" ); |
77 |
System.out.println(uri); |
78 |
|
79 |
System.out.println(text); |
80 |
File f = new File( "D://study/Java/GoogleDict/" + searchterm + ".txt" ); |
81 |
FileWriter fw = new FileWriter(f); |
82 |
fw.write(text); |
83 |
fw.flush(); |
84 |
fw.close(); |
85 |
} |
86 |
} |
87 |
catch (Exception e) |
88 |
{ |
89 |
e.printStackTrace(); |
90 |
} |
91 |
} |
92 |
} |
注:在使用HTMLParser处理google翻译返回的结果时,由于同时存在<span>...</span>,<div>...</div>,<span><div>...</div></span>三种标签,导致处理比较困难,个人对HTMLParser库也不是很熟悉,所以最终所得结果并不是很满意。
3.IcibaDict.java
01 |
package MultiTread; |
02 |
import java.io.File; |
03 |
import java.io.FileWriter; |
04 |
import org.apache.http.HttpEntity; |
05 |
import org.apache.http.HttpResponse; |
06 |
import org.apache.http.client.HttpClient; |
07 |
import org.apache.http.client.methods.HttpGet; |
08 |
import org.apache.http.impl.client.DefaultHttpClient; |
09 |
import org.apache.http.util.EntityUtils; |
10 |
import org.htmlparser.Node; |
11 |
import org.htmlparser.NodeFilter; |
12 |
import org.htmlparser.Parser; |
13 |
import org.htmlparser.filters.AndFilter; |
14 |
import org.htmlparser.filters.HasAttributeFilter; |
15 |
import org.htmlparser.filters.TagNameFilter; |
16 |
import org.htmlparser.util.NodeList; |
17 |
import org.htmlparser.visitors.TextExtractingVisitor; |
18 |
class IcibaDict extends Thread |
19 |
{ |
20 |
private String searchterm= null ; |
21 |
public IcibaDict(String input) |
22 |
{ |
23 |
this .searchterm=input; |
24 |
} |
25 |
public void run() |
26 |
{ |
27 |
String text= null ,webContent= null ; |
28 |
try |
29 |
{ |
30 |
HttpClient httpclient = new DefaultHttpClient(); |
31 |
String searchstring = "http://www.iciba.com/" + searchterm + "/" ; |
32 |
HttpGet httpget = new HttpGet(searchstring); |
33 |
HttpResponse response = httpclient.execute(httpget); |
34 |
HttpEntity entity = response.getEntity(); |
35 |
if (entity != null ) { |
36 |
String content=EntityUtils.toString(entity); |
37 |
content=content.replaceAll( "<a href" , " <a href" ); |
38 |
Parser parser = new Parser(content); |
39 |
parser.setEncoding( "gb2312" ); |
40 |
NodeFilter filter_tab_content = new AndFilter( new TagNameFilter( |
41 |
"div" ), new HasAttributeFilter( "class" , "tab_content" )); |
42 |
NodeList nodelist_tab_content = parser.parse(filter_tab_content); |
43 |
int length = nodelist_tab_content.size(); |
44 |
for ( int i = 0 ; i < length; i++) { |
45 |
Node node_tab_content = nodelist_tab_content.elementAt(i); |
46 |
Parser parser_tab_content = new Parser(node_tab_content |
47 |
.toHtml()); |
48 |
TextExtractingVisitor visitor_tab_content = new TextExtractingVisitor(); |
49 |
parser_tab_content.visitAllNodesWith(visitor_tab_content); |
50 |
text = text+ "/n" +visitor_tab_content.getExtractedText().trim(); |
51 |
} |
52 |
parser.reset(); |
53 |
NodeFilter filter_web = new AndFilter( new TagNameFilter( |
54 |
"div" ), new HasAttributeFilter( "class" , "content_block" )); |
55 |
NodeList nodelist_web = parser.parse(filter_web); |
56 |
Node node_web = nodelist_web.elementAt( 0 ); |
57 |
if (node_web!= null ) |
58 |
{ |
59 |
Parser parser_web = new Parser(node_web.toHtml()); |
60 |
TextExtractingVisitor visitor_web = new TextExtractingVisitor(); |
61 |
parser_web.visitAllNodesWith(visitor_web); |
62 |
webContent=visitor_web.getExtractedText().trim(); |
63 |
} |
64 |
text=text+webContent; |
65 |
text=text.replaceAll( " " , "" ); |
66 |
text=text.replaceAll( " " , "" ); |
67 |
text=text.replaceAll( " " , "/n" ); |
68 |
text=text.replaceAll( "/n/n/n" , "/n" ); |
69 |
text=text.replaceAll( "/n/n" , "/n" ); |
70 |
text=text.replaceAll( "/n/n" , "/n" ); |
71 |
text=text.replaceAll( " " , "" ); |
72 |
text=text.replace( "null" , "" ); |
73 |
text=text.replace( "相关搜索" , "" ); |
74 |
text=text.replace( "句库" , "" ); |
75 |
text=text.replace( "韦氏词典" , "" ); |
76 |
text=text.replace( "Dictionary" , "" ); |
77 |
|
78 |
System.out.println( "*************************************" + |
79 |
"爱词霸翻译*************************************" ); |
80 |
System.out.println(searchstring); |
81 |
|
82 |
System.out.println(text); |
83 |
|
84 |
File f = new File( "D://study/Java/IcibaDict/" + searchterm + ".txt" ); |
85 |
FileWriter fw = new FileWriter(f); |
86 |
fw.write(text); |
87 |
fw.flush(); |
88 |
fw.close(); |
89 |
} |
90 |
} |
91 |
catch (Exception e) |
92 |
{ |
93 |
e.printStackTrace(); |
94 |
} |
95 |
} |
96 |
} |