部门给我找了点事做,帮筛选简历.估计是觉得我加班少了.为了不浪费时间,写了个简单的简历内容打分排序,以后直接排序转发
代码如下:
package com.lu; import java.io.IOException; import java.io.StringReader; import java.io.UnsupportedEncodingException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import java.util.function.Consumer; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.codec.binary.Base64; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class LuceneUtils { /** * 获取分词结果 * * @param 输入的字符串 * @param 分词器 * @return 分词结果 */ // getWords("系统提供HTTP服务给其他系统用于实时数据交互,采用WebService与总行进行实时数据交互", // analyzer).forEach(System.out::println); public static ListgetWords(String str, Analyzer analyzer) { List result = new ArrayList (); TokenStream stream = null; try { stream = analyzer.tokenStream("content", new StringReader(str)); CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { result.add(attr.toString()); } } catch (IOException e) { e.printStackTrace(); } finally { if (stream != null) { try { stream.close(); } catch (IOException e) { e.printStackTrace(); } } } return result; } /** * 使用 Map按value进行排序 * * @param map * @return */ public static Map sortMapByValue(Map scoreMap) { if (scoreMap == null || scoreMap.isEmpty()) { return null; } Map sortedMap = new LinkedHashMap (); List > entryList = new ArrayList >(scoreMap.entrySet()); entryList.stream().sorted(new Comparator >() { @Override public int compare(Entry o1, Entry o2) { return Integer.compare(o1.getValue(), o2.getValue()); } }).forEach(new Consumer >() { @Override public void accept(Entry t) { sortedMap.put(t.getKey(), t.getValue()); } }); return sortedMap; } public static Optional checkGetContent(String content) { String regx = "(^[\\s|\\S]*?)Content-Type:text/html;charset=\"([\\s|\\S]*?)\"[\\s|\\S]*?Content-Transfer-Encoding:base64([\\S|\\s]*?)----boundary_([\\S|\\s]*?$)"; Pattern compile = Pattern.compile(regx); Matcher matcher = compile.matcher(content); if (matcher.matches()) { if (matcher.groupCount() > 0) { String matchCharset = matcher.group(2); String matchContent = matcher.group(3); return Optional.of(decodeStr(matchContent, matchCharset)); } } return Optional.of(content); } public static String decodeStr(String encodeStr, String charset) { byte[] b = encodeStr.getBytes(); Base64 base64 = new Base64(); b = base64.decode(b); String s; try { s = new String(b, charset); return s; } catch (UnsupportedEncodingException e) { s = new String(b); return s; } } public static Optional openFSDirectory(String indexPath) { Path path = Paths.get(indexPath); try { FSDirectory fsDirectory = FSDirectory.open(path); return Optional.of(fsDirectory); } catch (IOException e) { e.printStackTrace(); } return Optional.empty(); } }
package com.lu; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Optional; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; public class ContentScoror { String indexPath = "lucene\\Index\\"; MapscoreMap = new HashMap<>(); SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); public void eval(IndexSearcher searcher, Query query, Integer weight) throws IOException { TopDocs topDocs = searcher.search(query, 1000); ScoreDoc[] hits = topDocs.scoreDocs; for (int i = 0; i < hits.length; i++) { ScoreDoc hit = hits[i]; Document hitDoc = searcher.doc(hit.doc); System.out.println("(" + hit.doc + "-" + hit.score + ")" + " name:" + hitDoc.get("name")); String filename = hitDoc.get("name"); Integer score = scoreMap.get(filename); // 结果按照得分来排序。主要由 关键字的个数和权值来决定 if (null == score) { score = 0; scoreMap.put(filename, 0); } scoreMap.put(filename, score + weight); } } public void checkIndexAndScore(Directory directory, Analyzer analyzer) { try { IndexReader ir = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(ir); QueryParser parse = new QueryParser("content", analyzer); Query query = parse.parse("统招本科"); eval(searcher, query, 1); query = parse.parse("计算机数学信息管理"); eval(searcher, query, 1); query = parse.parse("Java Web App"); eval(searcher, query, 1); query = parse.parse("struts"); eval(searcher, query, 1); query = parse.parse("mybatis"); eval(searcher, query, 1); query = parse.parse("ibatis"); eval(searcher, query, 1); query = parse.parse("hibernate"); eval(searcher, query, 1); query = parse.parse("spring"); eval(searcher, query, 1); query = parse.parse("调优"); eval(searcher, query, 2); query = parse.parse("webservice"); eval(searcher, query, 1); query = parse.parse("axis"); eval(searcher, query, 2); query = parse.parse("xfire"); eval(searcher, query, 1); query = parse.parse("cxf"); eval(searcher, query, 1); query = parse.parse("jax-ws jws"); eval(searcher, query, 1); query = parse.parse("xml json"); eval(searcher, query, 1); query = parse.parse("oracle mysql sqlserver db2"); eval(searcher, query, 1); query = parse.parse("redis memcached"); eval(searcher, query, 1); query = parse.parse("组长管理设计架构分析"); eval(searcher, query, 1); Query pq = new PhraseQuery("content", "培训", "机构"); eval(searcher, pq, *); ir.close(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } } public void doScore() { Optional dir = LuceneUtils.openFSDirectory(indexPath); if (dir.isPresent()) { checkIndexAndScore(dir.get(), analyzer); } } public void showResult() { LuceneUtils.sortMapByValue(scoreMap).forEach((k, v) -> System.out.println(k + "---->" + v)); } public static void main(String[] args) { ContentScoror fie = new ContentScoror(); fie.doScore(); fie.showResult(); } }
package com.lu; import java.io.File; import java.io.IOException; import java.util.Optional; import java.util.stream.Stream; import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; public class FileIndexCreator { String indexPath = "lucene\\Index\\"; String contentFilePath = "content"; SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); public void addDoc(IndexWriter iw, File f) throws IOException { String str = FileUtils.readFileToString(f); Document doc = new Document(); doc.add(new StringField("name", f.getName(), Field.Store.YES)); doc.add(new TextField("content", LuceneUtils.checkGetContent(str).get(), Field.Store.YES)); iw.addDocument(doc); } public void content(IndexWriter iw) { File file = new File(contentFilePath); File[] listFiles = file.listFiles(); Stream.of(listFiles).forEach(f -> { try { addDoc(iw, f); } catch (IOException e) { e.printStackTrace(); } }); } public void createIndex() { // create index Optionaldir = LuceneUtils.openFSDirectory(indexPath); if (dir.isPresent()) { // 也可以存放到内存 // Directory directory = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter iw = null; try { iw = new IndexWriter(dir.get(), iwc); content(iw); iw.commit(); iw.close(); } catch (IOException e) { e.printStackTrace(); } } } public static void main(String[] args) { // Analyzer analyzer = new SmartChineseAnalyzer(); // getWords("系统提供HTTP服务给其他系统用于实时数据交互,采用WebService与总行进行实时数据交互", // analyzer).forEach(System.out::println); FileIndexCreator fie = new FileIndexCreator(); fie.createIndex(); } }
4.0.0 l.l.h domjj 0.0.1-SNAPSHOT xml-resolver xml-resolver 1.2 pull-parser pull-parser 2.1.10 org.dom4j dom4j 2.0.0-RC1 org.apache.lucene lucene-core 5.3.1 org.apache.lucene lucene-analyzers-common 5.3.1 org.apache.lucene lucene-queryparser 5.3.1 org.apache.lucene lucene-highlighter 5.3.1 org.apache.lucene lucene-analyzers-smartcn 5.3.1 commons-io commons-io 2.4 commons-codec commons-codec 1.9