public void createIndex1(Connection conn, AttachInfoDTO1 main) throws SQLException { //定义 IndexWriter writer = null; //文件路径 Directory directory = null; // 索引文件夹 File dir = new File("PICCAssets/index"); String path = dir.getAbsolutePath(); String path1 = path.replace("bin", "webapps"); try { directory = FSDirectory.open(new File(path1)); writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new IKAnalyzer())); Document doc = null; File f = null; InputStream br = null; if (main != null) { int fileid = main.getId(); String dname = main.getFileUrl(); String rname = "http://192.168.1.10:8080/PICCAssets/" + dname; FtpTask ftp = new FtpTask(); String path11 = dname.substring(0, dname.lastIndexOf("/") + 1); String file = dname.substring(dname.lastIndexOf("/") + 1); ftp.connectServer(path11); br = ); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); String author = main.getStaffName(); if ((author == null) || (("").equals(author))) { author = "系统管理员"; } doc = new Document(); Metadata metadata = new Metadata(); doc.add(new Field("content", new Tika().parseToString(br), Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("title", main.getAttachName(), Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("author", author, Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("rid", Integer.toString(main.getId()), Field.Store.YES, Field.Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); doc.add(new Field("filename", main.getAttachName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("path", dname, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("dpath", rname, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("dtype", main.getFileTypeLever1(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("date", main.getUploadDate(), Field.Store.YES, Field.Index.NOT_ANALYZED)); int page = 0; try { page = Integer.parseInt(metadata.get("xmpTPg:NPages")); } catch (NumberFormatException e) { } if (doc != null) { try { writer.addDocument(doc); } catch (Exception e) { // TODO: handle exception e.printStackTrace(); } } } } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { // TODO: handle exception e.printStackTrace(); } finally { if (writer != null) { try { writer.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } }
删除全部索引
/** * 删除全部索引 * * @throws Exception */ public void deleteAllIndex() throws Exception { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); IndexWriter indexWriter = new IndexWriter(FileIndexUtils.getDirectory(), config); indexWriter.deleteAll(); indexWriter.close(); }
删除单个索引
/** * 删除单个索引 * * @throws Exception */ public void deleteOneIndex(String uid) throws Exception { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); IndexWriter indexWriter = new IndexWriter(FileIndexUtils.getDirectory(), config); indexWriter.deleteDocuments(new Term("rid", uid)); indexWriter.close(); }
//高亮查询索引对应的文档 以及组合查询
@SuppressWarnings({ "rawtypes", "unused" }) public PagerDTO<LuceneDTO> searchPage(String key, int starttime, int endtime, String dtype, int pageIndex, int pageSize) { // String Analyzer luceneAnalyzer = new IKAnalyzer(); PagerDTO<LuceneDTO> pages = new PagerDTO<LuceneDTO>(); List<LuceneDTO> l = new ArrayList<LuceneDTO>(); LuceneDTO bean = null; try { // 索引文件夹 IndexSearcher searcher = new IndexSearcher(IndexReader.open(FileIndexUtils.getDirectory())); BooleanQuery query0 = new BooleanQuery(); // 日期区间 NumericRangeQuery q2 = NumericRangeQuery.newIntRange("date", starttime, endtime, true, true); // 关键字查询 MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, new String[] { "title", "content" }, luceneAnalyzer); if (key == null || "".equals(key)) { query0.add(new TermQuery(new Term("dtype", dtype)), Occur.SHOULD); } else { Query queryKey = parser.parse(key); query0.add(new TermQuery(new Term("dtype", dtype)), Occur.MUST); query0.add(queryKey, Occur.MUST); } // 先获取上一页的最后一个元素 ScoreDoc lastSd = getLastScoreDoc(pageIndex, pageSize, query0, searcher); // 通过最后一个元素搜索下页的pageSize个元素 TopDocs tds = searcher.searchAfter(lastSd, query0, pageSize); int totalRecord = tds.totalHits; // 搜素开始时间 Date beginTime = new Date(); for (ScoreDoc sd : tds.scoreDocs) { Document doc = searcher.doc(sd.doc); String path = doc.get("path"); String filen = path.split("/")[2]; String filename = filen.substring(0, filen.lastIndexOf(".")) + ".swf"; /** * 按照http远程读取文件方式,没有储存在索引上 */ String content = doc.get("content"); String htitle = doc.get("title"); QueryScorer scorer = new QueryScorer(query0); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>"); Highlighter lighter = new Highlighter(formatter, scorer); lighter.setTextFragmenter(fragmenter); // 设置高亮字数多少 lighter.setTextFragmenter(new SimpleFragmenter(200)); /* * 设置高亮,如果有关键字就加亮,没有就返回空,所以要加判断 */ String str = lighter.getBestFragment(luceneAnalyzer, "f", content); String titlel = lighter.getBestFragment(luceneAnalyzer, "s", htitle); bean = new LuceneDTO(); if (titlel != null) { bean.setTitle(titlel); } else { bean.setTitle(htitle); } if (str != null) { bean.setStr(str); } else { if (content.length() != 0 && content.length() > 250) { String content1 = content.substring(0, 250); bean.setStr(content1); } else { bean.setStr(content); } } bean.setDate(doc.get("date")); bean.setType(doc.get("title").substring(doc.get("title").lastIndexOf(".") + 1)); bean.setPath(doc.get("author")); bean.setFilename(filename); bean.setDpath(doc.get("dpath")); bean.setRid(doc.get("rid")); l.add(bean); } // 搜索完成时间 Date endTime = new Date(); DecimalFormat format = new DecimalFormat("0.000"); double timeOfSearch = (endTime.getTime() - beginTime.getTime()) * 0.001; int totalPage = (totalRecord - 1) / pageSize + 1; pages.setTimeOfSearch(format.format(timeOfSearch)); pages.setTotalPage(totalPage); pages.setTotalRecord(totalRecord); pages.setPageIndex(pageIndex); pages.setPageSize(pageSize); pages.setDatas(l); searcher.close(); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } return pages; }
/** * 根据页码和分页大小获取上一次的最后一个ScoreDoc */ private ScoreDoc getLastScoreDoc(int pageIndex, int pageSize, Query query, IndexSearcher searcher) throws IOException { if (pageIndex == 1) return null;// 如果是第一页就返回空 int num = pageSize * (pageIndex - 1);// 获取上一页的数量 TopDocs tds = searcher.search(query, num); return tds.scoreDocs[num - 1]; }