Lucene3.6总结篇


1、合并索引库片段文件
IndexWriter的optimize()方法已经过时,因为这个方法的效率很低。合并文件主要是使用IndexWriter的setMergeFactor(int)方法,但是在Lucene3.6版本中,该方法已过时,直接使用LogMergePolicy.setMergeFactor(int)方法代替。
当setMergeFactor(int)的参数值较小的时候,创建索引的速度较慢。当参数值较大的时候,创建索引的速度就比较快。大于10适合批量创建索引。


示例代码

//合并索引文件
	@Test
	public void testMergeFactor() {
		
		try {
			String path = "D:\\LuceneEx\\day03";
			File file = new File(path);
			//在当前路径下建立一个目录叫indexDir
//			File indexDir = new File("./indexDir");
			Directory mDirectory = FSDirectory.open(file);

			Analyzer mAnalyzer = new IKAnalyzer();
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
					mAnalyzer);

			LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();

			// mergePolicy.setMaxMergeDocs(maxMergeDocs);
			// mergePolicy.setMaxMergeMB(mb);
			mergePolicy.setMergeFactor(3); // 达到3个文件时就和合并
			config.setMergePolicy(mergePolicy);

			IndexWriter mIndexWriter = new IndexWriter(mDirectory, config);

			Book book1 = createBook("Android内核揭秘", "ABC", "2010-07",
					"android 移动开发", 8.9f);
			Document doc1 = createDocument(book1);

			Book book2 = createBook("Android多媒体开发", "BCD", "2011-07",
					"android 多媒体", 8.5f);
			Document doc2 = createDocument(book2);

			Book book3 = createBook("Android企业应用开发", "QAB", "2012-05",
					"android 企业应用", 8.2f);
			Document doc3 = createDocument(book3);
			doc3.setBoost(1.5F); // boost:设置得分,2F在当前得分的基础上*2,使得分增高

			Book book4 = createBook("Android内核剖析", "WPS", "2012-09",
					"android 驱动开发", 9.8f);
			Document doc4 = createDocument(book4);

			mIndexWriter.addDocument(doc1);
			mIndexWriter.addDocument(doc2);
			mIndexWriter.addDocument(doc3);
			mIndexWriter.addDocument(doc4);

			mIndexWriter.close();

		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	/**
	 * 创建文档对象的工具方法
	 * 
	 * @param book
	 * @return
	 */
	public Document createDocument(Book book) {
		Document doc = new Document();

		Field id = new Field("id", book.getId() + "", Store.YES, Index.ANALYZED);
		Field title = new Field("title", book.getTitle(), Store.YES,
				Index.ANALYZED);
		Field author = new Field("author", book.getAuthor(), Store.YES,
				Index.ANALYZED);
		Field publishTime = new Field("publishTime", book.getPublishTime(),
				Store.YES, Index.ANALYZED);
		Field source = new Field("source", book.getSource(), Store.YES,
				Index.ANALYZED);
		Field category = new Field("category", book.getCategory(), Store.YES,
				Index.ANALYZED);
		Field reputation = new Field("reputation", book.getReputation() + "",
				Store.YES, Index.ANALYZED);

		doc.add(id);
		doc.add(title);
		doc.add(author);
		doc.add(publishTime);
		doc.add(source);
		doc.add(category);
		doc.add(reputation);

		return doc;
	}

	/**
	 * 创建Book对象
	 * 
	 * @param title
	 * @param author
	 * @param publishTime
	 * @param category
	 * @param reputation
	 * @return
	 */
	public Book createBook(String title, String author, String publishTime,
			String category, float reputation) {

		Random r = new Random();
		int id = r.nextInt(10000);

		Book book = new Book();
		book.setId(id);
		book.setAuthor(author);
		book.setTitle(title);
		book.setCategory(category);
		book.setPublishTime(publishTime);
		book.setReputation(reputation);
		book.setSource("清华大学出版社");

		return book;
	}



2、内存索引目录和文件系统索引目录结合使用
内存索引目录的操作速度非常快,所以我们在操作索引的时候可以把索引库从文件系统加载到内存中,操作完成后再写回文件系统。
内存中的索引文件写回到文建系统中的时候,我们需要对索引目录进行重建。比如原来文件系统中的索引目录有10个文件,加载到内存目录的时候是把10个文件拷贝一份到内存,然后我们添加了一个索引文件,内存中的索引目录文件数就变成11个,写会到文件系统的时候,内存索引目录文件数(11个)加上原来文件系统索引目录的文件数(10)就变成21个了,有10个文件是重复了,所以我们需要删除原来文件系统中的索引目录重新创建。
但是如果索引库是巨大的,不建议使用,因为所需的内存很大。


示例代码

@Test
	public void testRAMFSCombination() {

		try {
			String path = "D:\\LuceneEx\\day03";
			File file = new File(path);
			Directory mDirectory = FSDirectory.open(file);

			Analyzer mAnalyzer = new IKAnalyzer();
			
			// 创建内存索引目录,把文件系统中的索引库加载进来
			RAMDirectory mRamDirectory = new RAMDirectory(mDirectory);
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, mAnalyzer);
			
			IndexWriter ramIndexWriter = new IndexWriter(mRamDirectory, config);

			Book book1 = createBook("Spring企业开发", "QWA", "2012-07",
					"spring web开发", 8.5f);
			Document doc1 = createDocument(book1);

			Book book2 = createBook("精通SSH三大框架", "SSH", "2012-11",
					"web开发  SSH", 9.1f);
			Document doc2 = createDocument(book2);
			
			ramIndexWriter.addDocument(doc1);
			ramIndexWriter.addDocument(doc2);
			
			ramIndexWriter.close();
			
			//写回到文件中
			IndexWriterConfig fsIndexWriterConfig = new IndexWriterConfig(Version.LUCENE_36,mAnalyzer);
			//创建新的索引目录或者覆盖原来的索引目录
			fsIndexWriterConfig.setOpenMode(OpenMode.CREATE);
			
			IndexWriter fsIndexWriter = new IndexWriter(mDirectory, fsIndexWriterConfig);
			
			//把内存中的索引库写到文件系统中
			fsIndexWriter.addIndexes(mRamDirectory);
			fsIndexWriter.close();
			
		} catch (IOException e) {
			e.printStackTrace();
		}
	}





你可能感兴趣的:(索引优化,lucene3.6,索引合并)