/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package com.jd.lucene;
import java.io.File;
import java.io.IOException;
import java.sql.Date;
import java.text.Format;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Search {
private static String indexPath = "/home/mlzboy/my/crawler/index";//索引存放目录
/**
* @param args
* @throws IOException
* @throws CorruptIndexException
* @throws ParseException
*/
public static void main(String[] args) throws CorruptIndexException, IOException, ParseException {
// TODO Auto-generated method stub
IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(indexPath)));
System.out.println("total blogs:"+searcher.getIndexReader().numDocs());
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
QueryParser parser = new QueryParser(Version.LUCENE_34, "title", analyzer);//有变化的地方
Query query = parser.parse("诺基亚95");
query = parser.parse("lucene");
// SortField s1=new SortField("read",SortField.INT,true);
List<SortField> sortFields = new ArrayList<SortField>();
sortFields.add(new SortField("read", SortField.INT, true));
sortFields.add(new SortField("date", SortField.LONG, true));
SortField[] aa=new SortField[2];
sortFields.toArray(aa);
Sort sort=new Sort(aa);
// Sort sort=new Sort();
// sort.setSort(s1);
TopFieldDocs tfd=searcher.search(query,100,sort);
ScoreDoc[] hits = tfd.scoreDocs;
// TopScoreDocCollector collector = TopScoreDocCollector.create(100,false);//有变化的地方
// searcher.search(query, collector);
// ScoreDoc[] hits = collector.topDocs().scoreDocs;
System.out.println(hits.length);
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);//new method is.doc()
System.out.print(doc.getFieldable("id")+" "+doc.getFieldable("title")+" "+hits[i].toString()+" ");
System.out.print("=="+hits[i].doc+"====");
System.out.print(doc.getFieldable("link"));
Format formatter;
formatter = new SimpleDateFormat("yyyy-MM-dd hh:mm");
String s = formatter.format(Long.parseLong(doc.get("date")));
System.out.print(s+" ");
System.out.println(Integer.parseInt(doc.get("read")));
}
// System.out.println("Found " + collector.getTotalHits());
System.out.println("Found "+tfd.totalHits);
}
}
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package com.jd.lucene;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Scanner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class Index {
private static String indexPath = "/home/mlzboy/my/crawler/index";//索引存放目录
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
Directory dir = FSDirectory.open(new File(indexPath));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer);
// if (create) {
// // Create a new index in the directory, removing any
// // previously indexed documents:
// iwc.setOpenMode(OpenMode.CREATE);
// } else {
// // Add new documents to an existing index:
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
// }
// Optional: for better indexing performance, if you
// are indexing many documents, increase the RAM
// buffer. But if you do this, increase the max heap
// size to the JVM (eg add -Xmx512m or -Xmx1g):
//
// iwc.setRAMBufferSizeMB(256.0);
IndexWriter writer = new IndexWriter(dir, iwc);
writer.deleteAll();
Scanner scanner = new Scanner(new FileInputStream("/home/mlzboy/my/crawler/d.txt"), "UTF-8");
try {
int ii=0;
while (scanner.hasNextLine()){
String line=scanner.nextLine();
String[] elems=line.split(",");
System.out.println(elems[0]);
if (elems.length>2){
ii+=1;
Document doc = new Document();
Field f0=new Field("id",Integer.toString(ii),Field.Store.YES,Field.Index.NOT_ANALYZED);
Field f1=new Field("title",elems[0],Field.Store.YES,Field.Index.ANALYZED);
Field f2=new Field("link",elems[1],Field.Store.YES,Field.Index.NO);
System.out.println(elems[2]);
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm");
Date date=new Date();
String dateString = "2007-07-07 20:29";
dateString = elems[2];
try {
date = df.parse(dateString);
System.out.println(date.toLocaleString());
}
catch (Exception ex)
{ System.out.println(ex.getMessage());}
NumericField f3=new NumericField("date",Field.Store.YES,true);
f3.setLongValue(date.getTime());
doc.add(f3);
System.out.println(elems[2]);
doc.add(new NumericField("read",Field.Store.YES,true).setIntValue(Integer.parseInt(elems[3])));
doc.add(new NumericField("comment",Field.Store.YES,true).setIntValue(Integer.parseInt(elems[4])));
doc.add(f0);
doc.add(f1);
doc.add(f2);
writer.addDocument(doc);
}
}
}
finally{
scanner.close();
}
// Document doc = new Document();
// Field f=new Field("title","诺基亚返乡贴补n95",Field.Store.YES,Field.Index.ANALYZED);
// doc.add(f);
// writer.addDocument(doc);
// NOTE: if you want to maximize search performance,
// you can optionally call optimize here. This can be
// a costly operation, so generally it's only worth
// it when your index is relatively static (ie you're
// done adding documents to it):
//
writer.forceMerge(1);
// Term term=new Term("link","http://www.cnblogs.com/lexus/archive/2011/09/30/2196819.html");
// writer.deleteDocuments(term);
Term term=new Term("id","2162");
writer.deleteDocuments(term);
writer.close();
System.out.println(" caught b " );
System.out.println(new Date());
System.out.println(new Date().getTime());
} catch (IOException e) {
System.out.println(" caught a " );
}
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.jd</groupId>
<artifactId>lucene</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>lucene</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>3.5.0</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-demo</artifactId>
<version>3.5.0</version>
</dependency>
</dependencies>
</project>