简历打分排序

阅读更多

     部门给我找了点事做,帮筛选简历.估计是觉得我加班少了.为了不浪费时间,写了个简单的简历内容打分排序,以后直接排序转发

     代码如下:

     

package com.lu;

import java.io.IOException;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.codec.binary.Base64;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class LuceneUtils {
	/**
	 * 获取分词结果
	 * 
	 * @param 输入的字符串
	 * @param 分词器
	 * @return 分词结果
	 */		
	// getWords("系统提供HTTP服务给其他系统用于实时数据交互,采用WebService与总行进行实时数据交互",
	// analyzer).forEach(System.out::println);
	public static List getWords(String str, Analyzer analyzer) {
		List result = new ArrayList();
		TokenStream stream = null;
		try {
			stream = analyzer.tokenStream("content", new StringReader(str));
			CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class);
			stream.reset();
			while (stream.incrementToken()) {
				result.add(attr.toString());
			}
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (stream != null) {
				try {
					stream.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return result;
	}

	/**
	 * 使用 Map按value进行排序
	 * 
	 * @param map
	 * @return
	 */
	public static Map sortMapByValue(Map scoreMap) {
		if (scoreMap == null || scoreMap.isEmpty()) {
			return null;
		}
		Map sortedMap = new LinkedHashMap();

		List> entryList = new ArrayList>(scoreMap.entrySet());

		entryList.stream().sorted(new Comparator>() {
			@Override
			public int compare(Entry o1, Entry o2) {
				return Integer.compare(o1.getValue(), o2.getValue());
			}
		}).forEach(new Consumer>() {
			@Override
			public void accept(Entry t) {
				sortedMap.put(t.getKey(), t.getValue());
			}
		});
		return sortedMap;
	}

	public static Optional checkGetContent(String content) {
		String regx = "(^[\\s|\\S]*?)Content-Type:text/html;charset=\"([\\s|\\S]*?)\"[\\s|\\S]*?Content-Transfer-Encoding:base64([\\S|\\s]*?)----boundary_([\\S|\\s]*?$)";
		Pattern compile = Pattern.compile(regx);
		Matcher matcher = compile.matcher(content);
		if (matcher.matches()) {
			if (matcher.groupCount() > 0) {
				String matchCharset = matcher.group(2);
				String matchContent = matcher.group(3);
				return Optional.of(decodeStr(matchContent, matchCharset));
			}
		}
		return Optional.of(content);
	}

	public static String decodeStr(String encodeStr, String charset) {
		byte[] b = encodeStr.getBytes();
		Base64 base64 = new Base64();
		b = base64.decode(b);
		String s;
		try {
			s = new String(b, charset);
			return s;
		} catch (UnsupportedEncodingException e) {
			s = new String(b);
			return s;
		}
	}

	public static Optional openFSDirectory(String indexPath) {
		Path path = Paths.get(indexPath);
		try {
			FSDirectory fsDirectory = FSDirectory.open(path);
			return Optional.of(fsDirectory);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return Optional.empty();
	}

}

 

package com.lu;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;

public class ContentScoror {
	String indexPath = "lucene\\Index\\";

	Map scoreMap = new HashMap<>();
	SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();

	public void eval(IndexSearcher searcher, Query query, Integer weight) throws IOException {
		TopDocs topDocs = searcher.search(query, 1000);
		ScoreDoc[] hits = topDocs.scoreDocs;
		for (int i = 0; i < hits.length; i++) {
			ScoreDoc hit = hits[i];
			Document hitDoc = searcher.doc(hit.doc);
			System.out.println("(" + hit.doc + "-" + hit.score + ")" + " name:" + hitDoc.get("name"));
			String filename = hitDoc.get("name");
			Integer score = scoreMap.get(filename);
			// 结果按照得分来排序。主要由 关键字的个数和权值来决定
			if (null == score) {
				score = 0;
				scoreMap.put(filename, 0);
			}
			scoreMap.put(filename, score + weight);
		}
	}

	public void checkIndexAndScore(Directory directory, Analyzer analyzer) {
		try {
			IndexReader ir = DirectoryReader.open(directory);
			IndexSearcher searcher = new IndexSearcher(ir);
			QueryParser parse = new QueryParser("content", analyzer);
 
			Query query = parse.parse("统招本科");
			eval(searcher, query, 1);

			query = parse.parse("计算机数学信息管理");
			eval(searcher, query, 1);

			query = parse.parse("Java Web App");
			eval(searcher, query, 1);

			query = parse.parse("struts");
			eval(searcher, query, 1);

			query = parse.parse("mybatis");
			eval(searcher, query, 1);

			query = parse.parse("ibatis");
			eval(searcher, query, 1);

			query = parse.parse("hibernate");
			eval(searcher, query, 1);

			query = parse.parse("spring");
			eval(searcher, query, 1);

			query = parse.parse("调优");
			eval(searcher, query, 2);

			query = parse.parse("webservice");
			eval(searcher, query, 1);

			query = parse.parse("axis");
			eval(searcher, query, 2);

			query = parse.parse("xfire");
			eval(searcher, query, 1);

			query = parse.parse("cxf");
			eval(searcher, query, 1);

			query = parse.parse("jax-ws jws");
			eval(searcher, query, 1);

			query = parse.parse("xml json");
			eval(searcher, query, 1);

			query = parse.parse("oracle mysql sqlserver db2");
			eval(searcher, query, 1);

			query = parse.parse("redis memcached");
			eval(searcher, query, 1);

			query = parse.parse("组长管理设计架构分析");
			eval(searcher, query, 1);

			Query pq = new PhraseQuery("content", "培训", "机构");
			eval(searcher, pq, *);
			ir.close();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

	public void doScore() {
		Optional dir = LuceneUtils.openFSDirectory(indexPath);
		if (dir.isPresent()) {
			checkIndexAndScore(dir.get(), analyzer);
		}
	}

	public void showResult() {
		LuceneUtils.sortMapByValue(scoreMap).forEach((k, v) -> System.out.println(k + "---->" + v));
	}

	public static void main(String[] args) {
		ContentScoror fie = new ContentScoror();
		fie.doScore();
		fie.showResult();
	}

}

 

 

package com.lu;

import java.io.File;
import java.io.IOException;
import java.util.Optional;
import java.util.stream.Stream;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;

public class FileIndexCreator {
	String indexPath = "lucene\\Index\\";
	String contentFilePath = "content";

	SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();

	public void addDoc(IndexWriter iw, File f) throws IOException {
		String str = FileUtils.readFileToString(f);
		Document doc = new Document();
		doc.add(new StringField("name", f.getName(), Field.Store.YES));
		doc.add(new TextField("content", LuceneUtils.checkGetContent(str).get(), Field.Store.YES));
		iw.addDocument(doc);
	}

	public void content(IndexWriter iw) {
		File file = new File(contentFilePath);
		File[] listFiles = file.listFiles();
		Stream.of(listFiles).forEach(f -> {
			try {
				addDoc(iw, f);
			} catch (IOException e) {
				e.printStackTrace();
			}
		});
	}

	public void createIndex() {
		// create index
		Optional dir = LuceneUtils.openFSDirectory(indexPath);
		if (dir.isPresent()) {
			// 也可以存放到内存
			// Directory directory = new RAMDirectory();
			IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
			IndexWriter iw = null;
			try {
				iw = new IndexWriter(dir.get(), iwc);
				content(iw);
				iw.commit();
				iw.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

	public static void main(String[] args) {
		// Analyzer analyzer = new SmartChineseAnalyzer();
		// getWords("系统提供HTTP服务给其他系统用于实时数据交互,采用WebService与总行进行实时数据交互",
		// analyzer).forEach(System.out::println);
		FileIndexCreator fie = new FileIndexCreator();
		fie.createIndex();
	}

}

 


	4.0.0
	l.l.h
	domjj
	0.0.1-SNAPSHOT

	
		
		
			xml-resolver
			xml-resolver
			1.2
		
		
			pull-parser
			pull-parser
			2.1.10
		

		
			org.dom4j
			dom4j
			2.0.0-RC1
		

		
			org.apache.lucene
			lucene-core
			5.3.1
		
		
			org.apache.lucene
			lucene-analyzers-common
			5.3.1
		
		
			org.apache.lucene
			lucene-queryparser
			5.3.1
		
		
		
			org.apache.lucene
			lucene-highlighter
			5.3.1
		
		
		
			org.apache.lucene
			lucene-analyzers-smartcn
			5.3.1
		
		
		
			commons-io
			commons-io
			2.4
		

		
			commons-codec
			commons-codec
			1.9
		

	

 

你可能感兴趣的:(Java,Lucene,Regex)