写的一款简易的热点词汇记录工具

项目需要对用户提交的附件、文章、搜索框内容等做热词分析。如下图:
写的一款简易的热点词汇记录工具_第1张图片
公司有大数据团队。本着不麻烦别人就不麻烦别人的原则,写了一款简易的记录工具,原理也简单,手工在业务插入锚点,用分词器分好词,排掉字母、数字、符号、敏感词。将词汇按年度累加记录到数据库中即可。代码如下:

@Component
public class HotWordHelper {

    private static HotWordMapper hotWordMapper;

    static List<Character> FILTER_CHARS = new ArrayList<>();

    static {
        String number = "123456789abcdefghijklnmopqrstuvwxyzABCDEFGHIJKLNMOPQRSTUVWXYZ";
        char[] chars = number.toCharArray();
        for (char aChar : chars) {
            FILTER_CHARS.add(aChar);
        }
    }

    @Autowired
    public ZYHotWordHelper(HotWordMapper hotWordMapper) {
        ZYHotWordHelper.hotWordMapper = hotWordMapper;
    }

    public static List<HotWord> loaderHotWordTen(String moduleCode) {
        LocalDate now = LocalDate.now();
        int year = now.getYear();
        return loaderHotWord(year, 10, moduleCode);
    }

    public static List<HotWord> loaderHotWord(int top, String moduleCode) {
        LocalDate now = LocalDate.now();
        int year = now.getYear();
        return loaderHotWord(year, top, moduleCode);
    }

    public static List<HotWord> loaderHotWord(int year, int top, String moduleCode) {
        LambdaQueryWrapper<HotWord> wrapper = Wrappers.lambdaQuery();
        wrapper.eq(HotWord::getRecordYear, year);
        wrapper.eq(HotWord::getModuleCode, moduleCode);
        return hotWordMapper.selectTop(wrapper, HotWord::getAppearTimes, top);
    }

	// 直接词汇,如字典之类的。
    public static void putDirectHotWord(String text, String moduleCode) {
    	// 为不影响主业务速度,改成异常
        Runnable runnable = () -> putHotWord(true, text, moduleCode);
        AsyncExecutor.execute(runnable);
    }
	
	// 分析词汇
    public static void putAnalyzeHotWord(String text, String moduleCode) {
    	// 为不影响主业务速度,改成异常
        Runnable runnable = () -> putHotWord(false, text, moduleCode);
        AsyncExecutor.execute(runnable);
    }

	// 附件
    public static void putAttachmentAsync(StringsField attachmentIds, String moduleCode) {
        if (ZYListUtils.isEmptyList(attachmentIds)) {
            return;
        }
        Runnable runnable = () -> doPutAttachmentAsync(attachmentIds, moduleCode);
        AsyncExecutor.execute(runnable);
    }

	// 解析附件
    private static void doPutAttachmentAsync(StringsField attachmentIds, String moduleCode) {
        FileInfoMapper fileInfoMapper = SpringUtils.getBean(FileInfoMapper.class);
        List<FileInfo> fileInfos = fileInfoMapper.selectBatchIds(attachmentIds);
        if (ZYListUtils.isEmptyList(fileInfos)) {
            return;
        }

        FileStoreService storeService= ZYSpringUtils.getBean(FileStoreService.class);
        List<FileWrapper> fileWrappers = ZYListUtils.list2list(fileInfos, FileInfo::toFileWrapper);
        for (FileWrapper fileWrapper : fileWrappers) {
            try (InputStream objectStream = storeService.getObjectStream(fileWrapper)) {
                String text = IOUtils.toString(objectStream, StandardCharsets.UTF_8);
                putAnalyzeHotWord(text, moduleCode);
            } catch (Exception e) {
                return;
            }

        }
    }

    private static void putHotWord(boolean isDirect, String text, String moduleCode) {
        if (ZYStrUtils.isAnyNull(text, moduleCode)) {
            return;
        }

        List<String> words = analyzerWords(isDirect, text);
        if (ZYListUtils.isEmptyList(words)) {
            return;
        }
        //  List smallWordCompare = new ArrayList<>(words);
        words.removeIf(w -> {
            if (!matchLength(w)) {
                return true;
            }
            char[] chars = w.toCharArray();
            for (char aChar : chars) {
                // 不要数字字母
                if (FILTER_CHARS.contains(aChar)) {
                    return true;
                }
            }

            // 存在误判,还是不用这段代码
      /*      for (String compareWord : smallWordCompare) {
                if (!w.equals(compareWord) && compareWord.contains(w)) {
                    return true;
                }
            }
*/
            return false;
        });
        Map<String, Integer> wordCount = ZYMapUtils.countField(words, w -> w);

        LocalDate now = LocalDate.now();
        int year = now.getYear();
        LambdaQueryWrapper<HotWord> wrapper = Wrappers.lambdaQuery();
        wrapper.in(HotWord::getHotWord, words);
        wrapper.eq(HotWord::getRecordYear, year);
        wrapper.eq(HotWord::getModuleCode, moduleCode);
        List<HotWord> existsWords = hotWordMapper.selectList(wrapper);
        Map<String, HotWord> wordIdContainer = ZYListUtils.groupModel(existsWords, HotWord::getHotWord);

        List<HotWord> addHotWords = new ArrayList<>();
        List<HotWord> editHotWords = new ArrayList<>();
        wordCount.forEach((w, times) -> {
            HotWord hotWord = wordIdContainer.get(w);
            if (null != hotWord) {
                Integer appearTimes = hotWord.getAppearTimes();
                appearTimes += times;
                hotWord.setAppearTimes(appearTimes);
                editHotWords.add(hotWord);
            } else {
                HotWord newHotWord = new HotWord();
                newHotWord.setRecordYear(year);
                newHotWord.setAppearTimes(times);
                newHotWord.setHotWord(w);
                newHotWord.setModuleCode(moduleCode);
                addHotWords.add(newHotWord);
            }
        });

        if (ZYListUtils.isNotEmptyList(addHotWords)) {
            hotWordMapper.insertBatch(addHotWords);
        }
        if (ZYListUtils.isNotEmptyList(editHotWords)) {
            for (HotWord editHotWord : editHotWords) {
                hotWordMapper.updateById(editHotWord);
            }
        }
    }

    private static List<String> analyzerWords(boolean isDirect, String text) {
        List<String> words;
        if (isDirect) {
            words = Collections.singletonList(text);
        } else {
            words = ZYDirtyWordHelper.analyze(text);
            if (matchLength(text) && !words.contains(text)) {
                words.add(text);
            }
        }
        return words;
    }
    
    private static boolean matchLength(String text) {
        int length = text.length();
        return length > 1 && length < 6;
    }

}

记录表中效果图,实际效果还阔以,实时性和记录速度都非常快。
写的一款简易的热点词汇记录工具_第2张图片

你可能感兴趣的:(java)