diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ce31bdb6..6686d2456 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,7 @@
* 【core 】 修复ChineseDate农历获取正月出现数组越界BUG(issue#2112@Github)
* 【extra 】 修复EmojiUtil.toHtmlHex()方法(pr#519@Gitee)
* 【system 】 修复CpuInfo.getUsed()方法(issue#2116@Github)
+* 【dfa 】 修复密集匹配和贪婪匹配冲突问题(issue#2126@Github)
-------------------------------------------------------------------------------------------------------------
# 5.7.20 (2022-01-20)
diff --git a/hutool-dfa/pom.xml b/hutool-dfa/pom.xml
index 6416c6f74..3e05431b2 100644
--- a/hutool-dfa/pom.xml
+++ b/hutool-dfa/pom.xml
@@ -17,11 +17,6 @@
Hutool 基于DFA的关键词查找
-
- cn.hutool
- hutool-core
- ${project.parent.version}
-
cn.hutool
hutool-json
diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
index 0c2584690..bcf81a784 100644
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/SensitiveUtil.java
@@ -195,11 +195,21 @@ public final class SensitiveUtil {
*/
public static T sensitiveFilter(T bean, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
String jsonText = JSONUtil.toJsonStr(bean);
- @SuppressWarnings("unchecked")
- final Class c = (Class) bean.getClass();
+ @SuppressWarnings("unchecked") final Class c = (Class) bean.getClass();
return JSONUtil.toBean(sensitiveFilter(jsonText, isGreedMatch, sensitiveProcessor), c);
}
+ /**
+ * 处理过滤文本中的敏感词,默认替换成*
+ *
+ * @param text 文本
+ * @return 敏感词过滤处理后的文本
+ * @since 5.7.21
+ */
+ public static String sensitiveFilter(String text) {
+ return sensitiveFilter(text, true, null);
+ }
+
/**
* 处理过滤文本中的敏感词,默认替换成*
*
@@ -214,13 +224,14 @@ public final class SensitiveUtil {
}
//敏感词过滤场景下,不需要密集匹配
- List foundWordList = getFoundAllSensitive(text, false, isGreedMatch);
+ List foundWordList = getFoundAllSensitive(text, true, isGreedMatch);
if (CollUtil.isEmpty(foundWordList)) {
return text;
}
sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {
} : sensitiveProcessor;
- Map foundWordMap = new HashMap<>(foundWordList.size());
+
+ final Map foundWordMap = new HashMap<>(foundWordList.size(), 1);
foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord));
int length = text.length();
StringBuilder textStringBuilder = new StringBuilder();
diff --git a/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java b/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
index 310f5958b..a58371707 100644
--- a/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
+++ b/hutool-dfa/src/main/java/cn/hutool/dfa/WordTree.java
@@ -3,7 +3,6 @@ package cn.hutool.dfa;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.lang.Filter;
-import cn.hutool.core.text.StrBuilder;
import cn.hutool.core.util.StrUtil;
import java.util.ArrayList;
@@ -247,15 +246,15 @@ public class WordTree extends HashMap {
List foundWords = new ArrayList<>();
WordTree current = this;
- int length = text.length();
+ final int length = text.length();
final Filter charFilter = this.charFilter;
//存放查找到的字符缓存。完整出现一个词时加到findedWords中,否则清空
- final StrBuilder wordBuffer = StrUtil.strBuilder();
- final StrBuilder keyBuffer = StrUtil.strBuilder();
+ final StringBuilder wordBuffer = StrUtil.builder();
+ final StringBuilder keyBuffer = StrUtil.builder();
char currentChar;
for (int i = 0; i < length; i++) {
- wordBuffer.reset();
- keyBuffer.reset();
+ wordBuffer.setLength(0);
+ keyBuffer.setLength(0);
for (int j = i; j < length; j++) {
currentChar = text.charAt(j);
// Console.log("i: {}, j: {}, currentChar: {}", i, j, currentChar);
@@ -284,6 +283,7 @@ public class WordTree extends HashMap {
if (false == isDensityMatch) {
//如果非密度匹配,跳过匹配到的词
i = j;
+ break;
}
if (false == isGreedMatch) {
//如果懒惰匹配(非贪婪匹配)。当遇到第一个结尾标记就结束本轮匹配
diff --git a/hutool-dfa/src/test/java/cn/hutool/dfa/DfaTest.java b/hutool-dfa/src/test/java/cn/hutool/dfa/DfaTest.java
index d466f69b7..f185fac31 100644
--- a/hutool-dfa/src/test/java/cn/hutool/dfa/DfaTest.java
+++ b/hutool-dfa/src/test/java/cn/hutool/dfa/DfaTest.java
@@ -47,7 +47,7 @@ public class DfaTest {
}
/**
- * 贪婪匹配原则测试
+ * 贪婪非密集匹配原则测试
*/
@Test
public void greedMatchTest() {
@@ -56,15 +56,15 @@ public class DfaTest {
// -----------------------------------------------------------------------------------------------------------------------------------
// 情况三:匹配到最长关键词,跳过已经匹配的关键词
- // 匹配到【大】,由于到最长匹配,因此【大土豆】接着被匹配
- // 由于【大土豆】被匹配,【土豆】被跳过,由于【刚出锅】被匹配,【出锅】被跳过
+ // 匹配到【大】,由于非密集匹配,因此从下一个字符开始查找,匹配到【土豆】接着被匹配
+ // 由于【刚出锅】被匹配,由于非密集匹配,【出锅】被跳过
List matchAll = tree.matchAll(text, -1, false, true);
- Assert.assertEquals(matchAll, CollUtil.newArrayList("大", "大土^豆", "刚出锅"));
+ Assert.assertEquals(matchAll, CollUtil.newArrayList("大", "土^豆", "刚出锅"));
}
/**
- * 密集匹配原则(最短匹配)和贪婪匹配原则测试
+ * 密集匹配原则(最长匹配)和贪婪匹配原则测试
*/
@Test
public void densityAndGreedMatchTest() {
@@ -80,6 +80,29 @@ public class DfaTest {
}
+ @Test
+ public void densityAndGreedMatchTest2(){
+ WordTree tree = new WordTree();
+ tree.addWord("赵");
+ tree.addWord("赵阿");
+ tree.addWord("赵阿三");
+
+ final List result = tree.matchAllWords("赵阿三在做什么", -1, true, true);
+ Assert.assertEquals(3, result.size());
+
+ Assert.assertEquals("赵", result.get(0).getWord());
+ Assert.assertEquals(0, result.get(0).getStartIndex().intValue());
+ Assert.assertEquals(0, result.get(0).getEndIndex().intValue());
+
+ Assert.assertEquals("赵阿", result.get(1).getWord());
+ Assert.assertEquals(0, result.get(1).getStartIndex().intValue());
+ Assert.assertEquals(1, result.get(1).getEndIndex().intValue());
+
+ Assert.assertEquals("赵阿三", result.get(2).getWord());
+ Assert.assertEquals(0, result.get(2).getStartIndex().intValue());
+ Assert.assertEquals(2, result.get(2).getEndIndex().intValue());
+ }
+
/**
* 停顿词测试
*/
diff --git a/hutool-dfa/src/test/java/cn/hutool/dfa/SensitiveUtilTest.java b/hutool-dfa/src/test/java/cn/hutool/dfa/SensitiveUtilTest.java
index ba7348b09..5bbbe9f23 100644
--- a/hutool-dfa/src/test/java/cn/hutool/dfa/SensitiveUtilTest.java
+++ b/hutool-dfa/src/test/java/cn/hutool/dfa/SensitiveUtilTest.java
@@ -1,5 +1,7 @@
package cn.hutool.dfa;
+import cn.hutool.core.collection.ListUtil;
+import lombok.Data;
import org.junit.Assert;
import org.junit.Test;
@@ -24,25 +26,17 @@ public class SensitiveUtilTest {
Assert.assertEquals(bean.getStr(), "我有一颗$****,***的");
}
+ @Data
public static class TestBean {
private String str;
private Integer num;
-
- public String getStr() {
- return str;
- }
-
- public void setStr(String str) {
- this.str = str;
- }
-
- public Integer getNum() {
- return num;
- }
-
- public void setNum(Integer num) {
- this.num = num;
- }
}
+ @Test
+ public void issue2126(){
+ SensitiveUtil.init(ListUtil.of("赵", "赵阿", "赵阿三"));
+
+ String result = SensitiveUtil.sensitiveFilter("赵阿三在做什么。", true, null);
+ Assert.assertEquals("***在做什么。", result);
+ }
}