自己编写了个分词查询xml,跟大家分享一下,我是用MyEclipse编写的
用这四个类之前你要调用三个包:
je-analysis-1.5.3.jar包
lucene-core-2.4.0.jar包
dom4j-1.6.1.jar包
提醒你一下最好跟我用的包是一个版本的,不然我不保证程序不会有错哦!!
一共四个类:
第一个类是用来分词的:
fenci.java
package com.hou;
import java.io.IOException;
import java.util.ArrayList;
import jeasy.analysis.MMAnalyzer;
public class fenci
{
private ArrayList<String> list =new ArrayList<String>();
public fenci(){}
public ArrayList<String> fen(String text)
{
MMAnalyzer analyzer = new MMAnalyzer();
String a=null;
String b="";
char[] c=null;
try
{
a=analyzer.segment(text,"|");
c = a.toCharArray();
for(int i=0;i<c.length;i++)
{
if(c[i]!='|')
b=b+c[i];
else
{ //if(b.length()>=1)
list.add(b);
b="";
}
}
}
catch (IOException e)
{
e.printStackTrace();
}
return list;
}
}
第二个类是用来解析XML的:
dom.java
package com.hou;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import java.io.File;
import java.util.Iterator;
public class dom {
int Count=0;
public dom(){}
public result[] so(String text,int selectnumber) throws DocumentException
{
int count=0;
SAXReader reader = new SAXReader();
Document document = reader.read(new File("C:\\Documents and Settings\\Administrator\\workspace\\hou/source.xml"));
Element rootElement=document.getRootElement();
String t = "";
result[] mm=new result[10000];
for(int i=0;i<10000;i++)
{
mm[i]=new result();
}
for (Iterator i= rootElement.elementIterator(); i.hasNext();)
{
Element resourceitemIn = (Element) i.next();
switch (selectnumber)
{
case 0:t =resourceitemIn.elementText("title");
break;
case 1:
t = resourceitemIn.elementText("title");
break;
case 2:
t = resourceitemIn.elementText("keywords");
break;
case 3:
t = resourceitemIn.elementText("kind");
break;
case 4:
t = resourceitemIn.elementText("describe");
break;
case 5:
t = resourceitemIn.elementText("date");
break;
case 6:
t = resourceitemIn.elementText("author");
break;
case 7:
t = resourceitemIn.elementText("publisher");
break;
}
if(t.indexOf(text)!=-1)
{
mm[count].setbd(true);
mm[count].setauthor(resourceitemIn.elementText("author"));
mm[count].setdate(resourceitemIn.elementText("date"));
mm[count].setdescribe(resourceitemIn.elementText("describe"));
mm[count].setkeywords(resourceitemIn.elementText("keywords"));
mm[count].setkind(resourceitemIn.elementText("kind"));
mm[count].setpublisher(resourceitemIn.elementText("publisher"));
mm[count].settitle(resourceitemIn.elementText("title"));
mm[count].seturl(resourceitemIn.elementText("url"));
count++;
}
}
Count=count;
result[] m=new result[count];
for(int i=0;i<count;i++)
{
m[i]=new result();
}
for(int j=0;j<count;j++)
{
m[j]=mm[j];
}
if(count!=0)
return m;
else
return null;
}
public int getcount()
{
return Count;
}
}
第三个类:
大家一定对上面的result类有疑问吧,(*^__^*) 嘻嘻……!!是我自己定义的!!!它是用来保存从xml中搜索出来的东西的,不用急最后我把XML也会贴出来的!!1
result.java
package com.hou;
public class result {
public boolean bd=true;
public String title=new String();
public String keywords=new String();
public String kind=new String();
public String describe=new String();
public String date=new String();
public String url=new String();
public String publisher=new String();
public String author=new String();
public result(){}
//为个元素赋值
public void setbd(boolean bd)
{
this.bd=bd;
}
public void settitle(String title)
{
this.title=title;
}
public void setkeywords(String keywords)
{
this.keywords=keywords;
}
public void setkind(String kind)
{
this.kind=kind;
}
public void setdescribe(String describe)
{
this.describe=describe;
}
public void setdate(String date)
{
this.date=date;
}
public void seturl(String url)
{
this.url=url;
}
public void setpublisher(String publisher)
{
this.publisher=publisher;
}
public void setauthor(String author)
{
this.author=author;
}
//获取个元素
public boolean getbd()
{
return bd;
}
public String gettitle()
{
return title;
}
public String getkeywords()
{
return keywords;
}
public String getkind()
{
return kind;
}
public String getdescribe()
{
return describe;
}
public String getdate()
{
return date;
}
public String geturl()
{
return url;
}
public String getpublisher()
{
return publisher;
}
public String getauthor()
{
return author;
}
}
第四个类:
用来获得你所要解析的词,还有一个int sn,它是一个整数值,跟我编的程序有关,如果你用不到可以删去,它的取之是从0-7,如果你只是想看看我的程序,那你随便输一个这其中的一个只就OK!!
search.java
package com.hou;
import java.util.ArrayList;
import org.dom4j.DocumentException;
import com.hou.*;
public class search {
int Allcount=0;
public search(){}
public result[] allresult(String tt,int sn) throws DocumentException
{
int allcount=0;
int x=0;
result[] a=new result[10000];
for(int i=0;i<10000;i++)
{
a[i]=new result();
}
dom d=new dom();
result[] part=new result[10000];
for(int i=0;i<10000;i++)
{
part[i]=new result();
}
part=d.so(tt,sn);
int partcount=d.getcount();
if(partcount!=0){
allcount=partcount;
for(;x<partcount;x++)
{
a[x]=part[x];
}
}
else
{
fenci fc=new fenci();
ArrayList<String> list=fc.fen(tt);
for(int i=0;i<list.size();i++)
{ dom d1=new dom();
String t=list.get(i).toString();
result[] part1=new result[10000];
for(int j=0;j<10000;j++)
{
part1[j]=new result();
}
part1=d1.so(t, sn);
allcount+=d1.getcount();
for(int k=0;k<d1.getcount();)
{
a[x++]=part1[k++];
}
}
}
Allcount=allcount;
if(x==0)
return null;
else
return a;
}
public int getallcount()
{
return Allcount;
}
}
当然还有一个XML文件喽!!
<?xml version="1.0" encoding="UTF-8"?>
<!-- edited with XML Spy v4.4 U (http://www.xmlspy.com) by Mars_Michael (Mars) -->
<!DOCTYPE allresource PUBLIC "-//Mars Michael" "resourcesize.dtd">
<allresource>
<resourceitem>
<title>火影忍者346,在线观看-56.com专辑</title>
<keywords>火影忍者346</keywords>
<kind>视屏</kind>
<describe>标题:火影忍者346视频数:285人气:类别:动漫游戏关键词:Raj Kai火影忍者创建时间:2008-11-05 20:43:10专辑介绍: 火影忍者 专辑的视频(共285个)顺序排序倒序排序智能排序 123456789>>> 火影忍者345作者:q409731535 火影忍者344作者:... </describe>
<date>2009-9-7 15:34:43</date>
<url>http://www.56.com/w58/album-aid-6785496.htmlc</url>
<author>56</author>
<publisher>56.com</publisher>
</resourceitem>
<resourceitem>
<title>09计算机等级考试</title>
<keywords>计算机</keywords>
<kind>doc</kind>
<describe>09全国计算机等级考试</describe>
<date>2009-09-7 15:34:43</date>
<url>http://www.qwjsjdj.com/09jsj.doc</url>
<author>发改委</author>
<publisher>国家教考中心</publisher>
</resourceitem>
<resourceitem>
<title>07各地高考作文题</title>
<keywords>作文,高考</keywords>
<kind>doc</kind>
<describe>07各地高考作文题</describe>
<date>2007-12-1 8:50:43</date>
<url>http://www.qilu.com/Query/source/07各地高考作文题.doc</url>
<author>李明</author>
<publisher>山东师大</publisher>
</resourceitem>
<resourceitem>
<title>汽车动画</title>
<keywords>汽车,动画</keywords>
<kind>wmv</kind>
<describe>学生作品</describe>
<date>2007-12-1 8:50:43</date>
<url>www.qilu.com/Query/source/jc.wmv</url>
<author>王朋</author>
<publisher>山东师大</publisher>
</resourceitem>
<resourceitem>
<title>稻香</title>
<keywords>稻香,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲专辑名:魔杰座</describe>
<date>2009-09-7 19:34:30</date>
<url>http://yili.qq.com/upload/8927269201241972138_1.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>彩虹</title>
<keywords>彩虹,周杰伦</keywords>
<kind>wma</kind>
<describe>音乐歌曲专辑名:我很忙</describe>
<date>2009-09-7 19:34:30</date>
<url>http://dj7.dj116.com/zwwma/0807/16/8.wma</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>给我一首歌的时间</title>
<keywords>时间,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲专辑名:我很忙</describe>
<date>2009-09-7 19:34:30</date>
<url>http://cdn1-43.projectplaylist.com/e1/files/cdn6/mp3_new/2631213.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>青花瓷</title>
<keywords>青花瓷,周杰伦</keywords>
<kind>wma</kind>
<describe>音乐歌曲专辑名:我很忙</describe>
<date>2009-09-7 19:34:30</date>
<url>http://www.slsd.com.cn/EBUpFileFolder/2008125172321132.wma</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>夜曲</title>
<keywords>夜曲,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲,2007世界巡回演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>http://class.zzbz.net/class/2006/702f/%e5%a4%9c%e6%9b%b2.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>说好的幸福呢</title>
<keywords>幸福,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲专辑名:魔杰座</describe>
<date>2009-09-7 19:34:30</date>
<url>http://cdn1-14.projectplaylist.com/e1/static12/mp3/2746839.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>菊花台</title>
<keywords>菊花台,周杰伦</keywords>
<kind>wma</kind>
<describe>音乐歌曲,2007世界巡回演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>http://chat.hz0572.com/musicplay/music/65.wma</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>简单爱</title>
<keywords>简单爱,周杰伦</keywords>
<kind>wma</kind>
<describe>音乐歌曲,2004无与伦比演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>http://jh2.884988.com/ge1q1u1/jiandan.wma</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>心雨</title>
<keywords>心雨,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲专辑名:依然范特西</describe>
<date>2009-09-7 19:34:30</date>
<url>http://61.135.154.4/motomusic/upload/album/54/st_xy.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>安静</title>
<keywords>安静,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲</describe>
<date>2009-09-7 19:34:30</date>
<url>http://www.mov8.com/dvbbs/UploadFile/dvd/toumingyu/jay/13.MP3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>借口</title>
<keywords>借口,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲,2004无与伦比演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>http://www.stcity.cn/mp3/006/TRACK12.MP3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>爱我别走</title>
<keywords>爱我别走,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲</describe>
<date>2009-09-7 19:34:30</date>
<url>http://61.144.43.233/btv/btvnew/mp3/awbz.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>蜗牛</title>
<keywords>蜗牛,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲</describe>
<date>2009-09-7 19:34:30</date>
<url>http://www.nxyc2z.com/./images/uploadfiles/20080625111251.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>千里之外</title>
<keywords>千里之外,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲,2007世界巡回演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>http://www.lance168.com/music/qlzhiwai.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>甜甜的</title>
<keywords>甜甜的,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲专辑名:我很忙</describe>
<date>2009-09-7 19:34:30</date>
<url>http://yili.qq.com/upload/4448902531242205500_1.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>退后</title>
<keywords>退后,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲,2007世界巡回演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>http://61.135.154.4/motomusic/upload/album/54/st_th.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>对不起</title>
<keywords>对不起,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲专辑名:范特西</describe>
<date>2009-09-7 19:34:30</date>
<url>http://cdn1-84.projectplaylist.com/e1/static3/mp3/1279103.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>不能说的秘密</title>
<keywords>秘密,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲,2007世界巡回演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>http://cdn1-26.projectplaylist.com/e1/static10/349/2398366.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>东风破</title>
<keywords>东风,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲</describe>
<date>2009-09-7 19:34:30</date>
<url>HTTP://www.chengdeshi.com/main/mytest/upimages/2007430213311421.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>搁浅</title>
<keywords>搁浅,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲,2004无与伦比演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>HTTP://www.chengdeshi.com/main/mytest/upimages/2007310103427625.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>轨迹</title>
<keywords>轨迹,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲专辑名:寻找周杰伦</describe>
<date>2009-09-7 19:34:30</date>
<url>http://www.onlinejd.cn//images/轨迹.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>七里香</title>
<keywords>七里香,周杰伦</keywords>
<kind>wma</kind>
<describe>音乐歌曲</describe>
<date>2009-09-7 19:34:30</date>
<url>http://218.56.164.7/yyt/lxgq/%E4%B8%83%E9%87%8C%E9%A6%99.wma</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>世界末日</title>
<keywords>世界末日,周杰伦</keywords>
<kind>wma</kind>
<describe>音乐歌曲</describe>
<date>2009-09-7 19:34:30</date>
<url>http://donor.km169.net/music/200503/9356.wma</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
<resourceitem>
<title>听妈妈的话</title>
<keywords>妈妈,周杰伦</keywords>
<kind>mp3</kind>
<describe>音乐歌曲,2007世界巡回演唱会</describe>
<date>2009-09-7 19:34:30</date>
<url>http://74z.eqedu.net.cn/Images/听妈妈的话.mp3</url>
<author>周杰伦</author>
<publisher>QQ音乐</publisher>
</resourceitem>
</allresource>
好了,现在给你一个调用我上面几个类的小例子!!!
dom.java
package com.hou;
import java.util.ArrayList;
import org.dom4j.DocumentException;
import com.hou.*;
public class diao {
public static void main(String[] args) throws DocumentException
{
result[] a1=new result[100000];
for(int i=0;i<100000;i++)
{
a1[i]=new result();
}
search a=new search();
a1=a.allresult("周杰伦梁静茹火影忍者", 2);
int c=a.getallcount();
for(int i=0;i<c;i++)
{
System.out.println(a1[i].getkeywords());
System.out.println(a1[i].getdate());
}
}
}
如果你没有看懂,OK!!把文件hou.jar给你哦!! 把整个在MyEclipse中打开!!!运行一下diao.java就OK!!