版本说明 | ||
ES版本 | IK版本 | Mysql版本 |
v7.6 | v7.6.1 | 5.7 |
IK分词器源码下载:https://github.com/medcl/elasticsearch-analysis-ik
修改源码思路:在初始化词典的时候,新起一个线程,定时去加载Mysql中的新词,即在TODO的地方加。
我个人打开IK源码后,打开pom.xml文件,发现ES的版本并不是7.6.1,修改elasticsearch版本号为7.6.1。如果没有遇到可忽略。
mysql
mysql-connector-java
5.1.38
将Dictionary类的getDictRoot方法改为public
public String getDictRoot() {
return conf_dir.toAbsolutePath().toString();
}
根据Dictionary类中的addWords方法增加一个addStopWords方法:
/**
* 批量加载新词条
*
* @param words
* Collection词条列表
*/
public void addWords(Collection words) {
if (words != null) {
for (String word : words) {
if (word != null) {
// 批量加载词条到主内存词典中
singleton._MainDict.fillSegment(word.trim().toCharArray());
}
}
}
}
/**
* 批量加载新停用词条
*
* @param words
* Collection词条列表
*/
public void addStopWords(Collection words) {
if (words != null) {
for (String word : words) {
if (word != null) {
// 批量加载词条到主内存词典中
singleton._StopWords.fillSegment(word.trim().toCharArray());
}
}
}
}
config目录下新建配置文件jdbc.properties:
# 数据库地址
jdbc.url=jdbc:mysql://127.0.0.1:3306/test?.....
# 数据库用户名
jdbc.user=root
# 数据库密码
jdbc.password=123456
在Dictionary类的同级包下创建一个ExtDictLoader类,代码如下:
package org.wltea.analyzer.dic;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.core.PathUtils;
import org.wltea.analyzer.help.ESPluginLoggerFactory;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.sql.*;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
/**
* 加载MySQL中词库内容
*/
public class ExtDictLoader {
private static final Logger LOGGER = ESPluginLoggerFactory.getLogger(ExtDictLoader.class.getName());
private static final ExtDictLoader INSTANCE = new ExtDictLoader();
private final String url;
private final String username;
private final String password;
private final AtomicBoolean extWordFirstLoad = new AtomicBoolean(false);
private final AtomicReference extWordLastLoadTimeRef = new AtomicReference<>(null);
private final AtomicBoolean stopWordFirstLoad = new AtomicBoolean(false);
private final AtomicReference stopWordLasLoadTimeRef = new AtomicReference<>(null);
private ExtDictLoader() {
Properties mysqlConfig = new Properties();
Path configPath = PathUtils.get(Dictionary.getSingleton().getDictRoot(), "jdbc.properties");
try {
mysqlConfig.load(new FileInputStream(configPath.toFile()));
this.url = mysqlConfig.getProperty("jdbc.url");
this.username = mysqlConfig.getProperty("jdbc.user");
this.password = mysqlConfig.getProperty("jdbc.password");
} catch (IOException e) {
throw new IllegalStateException("加载jdbc.properties配置⽂件发⽣异常");
}
// try {
// Class.forName("com.mysql.cj.jdbc.Driver");
// } catch (ClassNotFoundException e) {
// throw new IllegalStateException("加载数据库驱动时发⽣异常");
// }
}
public static ExtDictLoader getInstance() {
return INSTANCE;
}
public void loadExtensionWords() {
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
String sql;
if (extWordFirstLoad.compareAndSet(false, true)) {
// 首次加载全量的词
sql = "SELECT word FROM extension_word";
} else {
// 后面按时间加载增量的词
sql = "SELECT word FROM extension_word WHERE created_time >='" + extWordLastLoadTimeRef.get() + "'";
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String nowString = sdf.format(new Date());
extWordLastLoadTimeRef.set(nowString);
// 加载扩展词词库内容
try {
connection = DriverManager.getConnection(url, username, password);
statement = connection.createStatement();
resultSet = statement.executeQuery(sql);
LOGGER.info("从mysql加载extWord, sql={}", sql);
Set extensionWords = new HashSet<>();
while (resultSet.next()) {
String word = resultSet.getString("word");
if (word != null) {
extensionWords.add(word);
LOGGER.info("从mysql加载extWord,word={}", word);
}
}
// 放到字典⾥
Dictionary.getSingleton().addWords(extensionWords);
} catch (Exception e) {
LOGGER.error("从mysql加载extWord发⽣异常", e);
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) {
LOGGER.error(e);
}
}
if (null != statement) {
try {
statement.close();
} catch (SQLException e) {
LOGGER.error(e);
}
}
if (null != connection) {
try {
connection.close();
} catch (SQLException e) {
LOGGER.error(e);
}
}
}
}
public void loadMysqlStopWords() {
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
String sql;
if (stopWordFirstLoad.compareAndSet(false, true)) {
sql = "SELECT word FROM stop_word";
} else {
sql = "SELECT word FROM stop_word WHERE created_time >= '" + stopWordLasLoadTimeRef.get() + "'";
}
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String nowString = dateFormat.format(new Date());
stopWordLasLoadTimeRef.set(nowString);
// 加载词库内容
try {
connection = DriverManager.getConnection(url, username, password);
statement = connection.createStatement();
resultSet = statement.executeQuery(sql);
LOGGER.info("从mysql加载stopWord, sql={}", sql);
Set stopWords = new HashSet<>();
while (resultSet.next()) {
String word = resultSet.getString("word");
if (word != null) {
stopWords.add(word);
LOGGER.info("从mysql加载stopWord,word={}", word);
}
}
// 放到字典⾥
Dictionary.getSingleton().addStopWords(stopWords);
} catch (Exception e) {
LOGGER.error("从mysql加载extensionWord发⽣异常", e);
} finally {
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) {
LOGGER.error(e);
}
}
if (statement != null) {
try {
statement.close();
} catch (SQLException e) {
LOGGER.error(e);
}
}
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
LOGGER.error(e);
}
}
}
}
}
数据表结构:
# 扩展词
CREATE TABLE `extension_word` ( `id` int(11) NOT NULL AUTO_INCREMENT,
`word` varchar(64) NOT NULL,
`created_time` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
# 停用词
CREATE TABLE `stop_word` ( `id` int(11) NOT NULL AUTO_INCREMENT,
`word` varchar(64) NOT NULL,
`created_time` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
找到Dictionary类的initial方法,在我们一开始的TODO的地方增加代码:
new Thread(() -> {
while (true) {
try {
ExtDictLoader extDictLoader = ExtDictLoader.getInstance();
extDictLoader.loadExtensionWords();
extDictLoader.loadMysqlStopWords();
TimeUnit.SECONDS.sleep(60);
} catch (Exception e) {
e.printStackTrace();
}
}
}).start();
完成了上述步骤之后,我们就可以开始打包插件了。直接使用
maven clean package
命令进行打包,找到target/releases/elasticsearch-analysis-ik-7.6.1.zip文件,我们将其放在ES安装目录下的plugins目录下,新建一个ik文件夹,将其解压到ik文件夹下,目录结构大概如下:最后重启ES,如果是集群环境,别忘了在其他节点也分别执行第5步。
完成上述步骤后,我们就可以启动ES了,在启动过程中,可以看到关于IK热更新MySql词库相关的日志输出;在实际过程中,可能会报很多的异常,下面是我所遇到的一些问题以及解决方案;
(1)Could not create connection to database server
此异常通常是因为引用的mysql驱动和mysql版本号不一致导致的,只需要替换成对应的版本号即可解决,另外,数据库连接我们不需要再额外的去配置显示加载,即不需要写 Class.forName(props.getProperty("jdbc.className"));
(2)Caused by: java.security.AccessControlException: access denied (“java.lang.RuntimePermission” “setContextClassLoader”)
vim /usr/local/es/elasticsearch-7.6.1/jdk/conf/security/java.policy
# 添加如下配置
permission java.lang.RuntimePermission "setContextClassLoader";
(3)AccessControlException: access denied ("java.net.SocketPermission" "127.0.0.1:3306" "connect,resolve")
出现这个异常在(2)中的java.policy文件追加如下配置即可
permission java.net.SocketPermission "*", "connect,resolve";