kotlin定时加载数据库并将内容加载进Jieba词词库

jieba的Java版本吧add_word丧心病狂的private修饰,为了这个写个反射没啥意思.咱就用加载自定义词典的那个方法吧!

/**
 * 结巴分词,支持动态加载自定义词库
 * @author scz
 * @date 2019/12/21
 */
object JieBaParticiple {

    private val dictPath = "jiebawords/dict.txt"
    private val baiduTopKeyService: IBaiduTopKeyService = ServerApiFactory.INSTANCE.getApiService("baiduTopKeyService")

    init {
        val dictReloadPeriodHours :Long = 1 ;
        Timer("testTimer").schedule(object : TimerTask() {
            override fun run() {
                reloadJiebaDict()
            }
        }, 0, TimeUnit.HOURS.toMillis(dictReloadPeriodHours))//这里正常写数字就可以这是有个工具类
    }

    /***
     * 分词对外访问
     * @author scz
     * @date 2019/12/20 19:37
     * @param null
     * @return
     */
    fun participle(s:String): List<String>{
        return this.keywords(s)
    }

    /***
     * 分词对外访问
     * @author scz
     * @date 2019/12/20 19:37
     * @param keyworld
     * @param limit
     * @return
     */
    fun participle(s:String,limit:Int): List<String>{
        return this.keywords(s,limit)
    }

    /***
     * 更新分词词典
     * @author scz
     * @date 2019/12/21 11:01
     * @param null
     * @return
     */
    private fun reloadJiebaDict() {
        try {
            val listJieBaTopKey = baiduTopKeyService.baiduTopKey
            val newPath = Paths.get(File(javaClass.classLoader.getResource(dictPath)!!.path).absolutePath)
            val path = javaClass.classLoader.getResource(dictPath).path
            // 写入词典并追加权重3,词性nl
            IOUtil.writeListTotxt(listJieBaTopKey,path," 3 nl")
            //加载自定义的词典进词库
            WordDictionary.getInstance().loadUserDict(newPath)
            //MailUtil.send("更新分词词库", "success", MailUtil.DEV_BENNYTIAN, MailUtil.DEV_SONGCHENGZHI)
        } catch (e: Exception) {
            MailUtil.send("更新分词词库ERROR", e, MailUtil.DEV_BENNYTIAN, MailUtil.DEV_SONGCHENGZHI)
        }
    }

    /***
     * @author scz
     * @date 2019/12/21 11:48
     * @param null
     * @return 
     */
    private fun seg(s: String = ""): List<String> {
        var words: List<String> = emptyList()
        if (DataUtil.isEmpty(s)) {
            return words
        }
        try {
            if(words.isEmpty()){
                words = JiebaSegmenter().process(s, JiebaSegmenter.SegMode.INDEX)
                        .filter { a -> "," != a.word }
                        .map { w -> w.word.toUpperCase() }
                        .toList()
            }
        } catch (e: Exception) {
            e.printStackTrace()
        }
        return words
    }

    /***
     * 分词结果分组
     * @author scz
     * @date 2019/12/21 10:56
     * @param null
     * @return
     */
    private fun keywords(s: String , limit: Int = 100) : List<String>{
        val words = seg(s)
        return words.groupBy { it }.map { it.key to it.value.count() }.sortedBy { -it.second }
                .take(limit).map { it.first }.toList()
    }

}

加载词库和调用相分离,定时加载,调用不影响!写个轮子不容易啊!

你可能感兴趣的:(kotlin)