敏感词工具类
敏感词工具类
东拼西凑弄出来一个敏感词工具类
package com.weichu.util;import org.springframework.util.ResourceUtils;import java.io.*;
import java.util.*;/*** 敏感词过滤** @author 卫初* @version 1.0* @date 2021/7/28 11:28*/
public class SensitiveWordUtil {/*** 敏感词匹配规则*///最小匹配规则,如:敏感词库["中国","中国人"],语句:"我是中国人",匹配结果:我是[中国]人public static final int MinMatchTYpe = 1;//最大匹配规则,如:敏感词库["中国","中国人"],语句:"我是中国人",匹配结果:我是[中国人]public static final int MaxMatchType = 2;/*** 敏感词集合*/public static HashMap sensitiveWordMap;static {update();}/*** 更新敏感词库*/public static void update(){HashSet<String> set = new HashSet<>();String filePath = null;try {//获取文件的相对路径 可在控制台打印查看输出结果filePath = ResourceUtils.getFile("classpath:IllegalWordsSearch.txt").getPath();} catch (FileNotFoundException e) {e.printStackTrace();}Reader reader = null;BufferedReader br = null;try {reader = new FileReader(filePath);br = new BufferedReader(reader);String data = "";while ((data = br.readLine()) != null) {set.add(data);}init(set);} catch (IOException e) {e.printStackTrace();} finally {try {reader.close();br.close();} catch (Exception e) {e.printStackTrace();}}}/*** 初始化敏感词库,构建DFA算法模型** @param sensitiveWordSet 敏感词库*/public static synchronized void init(Set<String> sensitiveWordSet) {initSensitiveWordMap(sensitiveWordSet);}/*** 初始化敏感词库,构建DFA算法模型** @param sensitiveWordSet 敏感词库*/private static void initSensitiveWordMap(Set<String> sensitiveWordSet) {sensitiveWordMap = new HashMap(sensitiveWordSet.size()); //初始化敏感词容器,减少扩容操作String key = null;Map nowMap = null;Map<String, String> newWorMap = null;//迭代keyWordSetIterator<String> iterator = sensitiveWordSet.iterator();while (iterator.hasNext()) {key = iterator.next(); //关键字nowMap = sensitiveWordMap;for (int i = 0; i < key.length(); i++) {char keyChar = key.charAt(i); //转换成char型Object wordMap = nowMap.get(keyChar); //获取if (wordMap != null) { //如果存在该key,直接赋值nowMap = (Map) wordMap;} else { //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个newWorMap = new HashMap<String, String>();newWorMap.put("isEnd", "0"); //不是最后一个nowMap.put(keyChar, newWorMap);nowMap = newWorMap;}if (i == key.length() - 1) {nowMap.put("isEnd", "1"); //最后一个}}}}/*** 判断文字是否包含敏感字符** @param txt 文字* @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则* @return 若包含返回true,否则返回false*/public static boolean contains(String txt, int matchType) {boolean flag = false;for (int i = 0; i < txt.length(); i++) {//判断是否包含敏感字符int matchFlag = checkSensitiveWord(txt, i, matchType);//大于0存在,返回trueif (matchFlag > 0) {flag = true;}}return flag;}/*** 判断文字是否包含敏感字符** @param txt 文字* @return 若包含返回true,否则返回false*/public static boolean contains(String txt) {return contains(txt, MaxMatchType);}/*** 获取文字中的敏感词** @param txt 文字* @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则* @return*/public static Set<String> getSensitiveWord(String txt, int matchType) {Set<String> sensitiveWordList = new HashSet<>();for (int i = 0; i < txt.length(); i++) {//判断是否包含敏感字符int length = checkSensitiveWord(txt, i, matchType);//存在,加入list中if (length > 0) {sensitiveWordList.add(txt.substring(i, i + length));//减1的原因,是因为for会自增i = i + length - 1;}}return sensitiveWordList;}/*** 获取文字中的敏感词** @param txt 文字* @return*/public static Set<String> getSensitiveWord(String txt) {return getSensitiveWord(txt, MaxMatchType);}/*** 替换敏感字字符** @param txt 文本* @param replaceChar 替换的字符,匹配的敏感词以字符逐个替换,如 语句:我爱中国人 敏感词:中国人,替换字符:, 替换结果:我爱*** @param matchType 敏感词匹配规则* @return*/public static String replaceSensitiveWord(String txt, char replaceChar, int matchType) {String resultTxt = txt;//获取所有的敏感词Set<String> set = getSensitiveWord(txt, matchType);Iterator<String> iterator = set.iterator();String word;String replaceString;while (iterator.hasNext()) {word = iterator.next();replaceString = getReplaceChars(replaceChar, word.length());resultTxt = resultTxt.replaceAll(word, replaceString);}return resultTxt;}/*** 替换敏感字字符** @param txt 文本* @param replaceChar 替换的字符,匹配的敏感词以字符逐个替换,如 语句:我爱中国人 敏感词:中国人,替换字符:, 替换结果:我爱*** @return*/public static String replaceSensitiveWord(String txt, char replaceChar) {return replaceSensitiveWord(txt, replaceChar, MaxMatchType);}/*** 替换敏感字字符** @param txt 文本* @param replaceStr 替换的字符串,匹配的敏感词以字符逐个替换,如 语句:我爱中国人 敏感词:中国人,替换字符串:[屏蔽],替换结果:我爱[屏蔽]* @param matchType 敏感词匹配规则* @return*/public static String replaceSensitiveWord(String txt, String replaceStr, int matchType) {String resultTxt = txt;//获取所有的敏感词Set<String> set = getSensitiveWord(txt, matchType);Iterator<String> iterator = set.iterator();String word;while (iterator.hasNext()) {word = iterator.next();resultTxt = resultTxt.replaceAll(word, replaceStr);}return resultTxt;}/*** 替换敏感字字符** @param txt 文本* @param replaceStr 替换的字符串,匹配的敏感词以字符逐个替换,如 语句:我爱中国人 敏感词:中国人,替换字符串:[屏蔽],替换结果:我爱[屏蔽]* @return*/public static String replaceSensitiveWord(String txt, String replaceStr) {return replaceSensitiveWord(txt, replaceStr, MaxMatchType);}/*** 获取替换字符串** @param replaceChar* @param length* @return*/private static String getReplaceChars(char replaceChar, int length) {String resultReplace = String.valueOf(replaceChar);for (int i = 1; i < length; i++) {resultReplace += replaceChar;}return resultReplace;}/*** 检查文字中是否包含敏感字符,检查规则如下:
** @param txt* @param beginIndex* @param matchType* @return 如果存在,则返回敏感词字符的长度,不存在返回0*/private static int checkSensitiveWord(String txt, int beginIndex, int matchType) {//敏感词结束标识位:用于敏感词只有1位的情况boolean flag = false;//匹配标识数默认为0int matchFlag = 0;char word;Map nowMap = sensitiveWordMap;for (int i = beginIndex; i < txt.length(); i++) {word = txt.charAt(i);//获取指定keynowMap = (Map) nowMap.get(word);//存在,则判断是否为最后一个if (nowMap != null) {//找到相应key,匹配标识+1matchFlag++;//如果为最后一个匹配规则,结束循环,返回匹配标识数if ("1".equals(nowMap.get("isEnd"))) {//结束标志位为trueflag = true;//最小规则,直接返回,最大规则还需继续查找if (MinMatchTYpe == matchType) {break;}}} else {//不存在,直接返回break;}}//长度必须大于等于1,为词if (matchFlag < 2 || !flag) {matchFlag = 0;}return matchFlag;}/*** 写文件** @param newStr 新内容* @throws IOException*/public static boolean writeTxtFile(String newStr) throws IOException {// 先读取原有文件内容,然后进行写入操作String filePath = ResourceUtils.getFile("classpath:IllegalWordsSearch.txt").getPath();byte []sampleData = (newStr+ "\r\n").getBytes();FileOutputStream outputStream = null;try {outputStream = new FileOutputStream(new File(filePath),true);outputStream.write(sampleData);return true;} catch (Exception e) {e.printStackTrace();}finally {if (outputStream!=null){outputStream.close();}}return false;}}
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
