diff --git a/hutool-core/src/main/java/cn/hutool/core/text/dfa/NFA.java b/hutool-core/src/main/java/cn/hutool/core/text/dfa/NFA.java
index 468658a62..3d791679c 100644
--- a/hutool-core/src/main/java/cn/hutool/core/text/dfa/NFA.java
+++ b/hutool-core/src/main/java/cn/hutool/core/text/dfa/NFA.java
@@ -4,142 +4,186 @@ import java.util.*;
/**
*
- *
* 基于非确定性有穷自动机(NFA) 实现的多模匹配工具
+ *
*
* @author renyp
*/
public class NFA {
- private final Node root;
+ /**
+ * AC树的根节点
+ */
+ private final Node root;
+ /**
+ * 标记是否需要构建AC自动机,做树优化
+ */
+ private volatile boolean needBuildAc;
- /**
- * 默认构造
- */
- public NFA() {
- this.root = new Node();
- }
+ /**
+ * 内置锁,防止并发场景,并行建AC树,造成不可预知结果
+ */
+ private final Object buildAcLock;
- /**
- * 构造函数 并 初始化词库
- *
- * @param words 添加的新词
- */
- public NFA(final String... words) {
- this();
- this.insert(words);
- }
+ /**
+ * 内置锁,防止并行插入,新节点建立后,被挂载到树上前 被篡改
+ */
+ private final Object insertTreeLock;
- /**
- * 词库添加新词,初始化查找树
- *
- * @param word 添加的新词
- */
- public void insert(final String word) {
- Node p = root;
- for (final char curr : word.toCharArray()) {
- if (p.next.get((int) curr) == null) {
- p.next.put((int) curr, new Node());
- }
- p = p.next.get((int) curr);
- }
- p.flag = true;
- p.str = word;
- }
+ /**
+ * 默认构造
+ */
+ public NFA() {
+ this.root = new Node();
+ this.needBuildAc = true;
+ this.buildAcLock = new Object();
+ this.insertTreeLock = new Object();
+ }
- /**
- * 词库批量添加新词,初始化查找树
- *
- * @param words 添加的新词
- */
- public void insert(final String... words) {
- for (final String word : words) {
- this.insert(word);
- }
- }
+ /**
+ * 构造函数 并 初始化词库
+ *
+ * @param words 添加的新词
+ */
+ public NFA(String... words) {
+ this();
+ this.insert(words);
+ }
- /**
- * 构建基于NFA模型的 AC自动机
- */
- public void buildAc() {
- final Queue queue = new LinkedList<>();
- final Node p = root;
- for (final Integer key : p.next.keySet()) {
- p.next.get(key).fail = root;
- queue.offer(p.next.get(key));
- }
- while (!queue.isEmpty()) {
- final Node curr = queue.poll();
- for (final Integer key : curr.next.keySet()) {
- Node fail = curr.fail;
- // 查找当前节点匹配失败,他对应等效匹配的节点是哪个
- while (fail != null && fail.next.get(key) == null) {
- fail = fail.fail;
- }
- // 代码到这,有两种可能,fail不为null,说明找到了fail;fail为null,没有找到,那么就把fail指向root节点(当到该节点匹配失败,那么从root节点开始重新匹配)
- if (fail != null) {
- fail = fail.next.get(key);
- } else {
- fail = root;
- }
- curr.next.get(key).fail = fail;
- queue.offer(curr.next.get(key));
- }
- }
- }
+ /**
+ * 词库添加新词,初始化查找树
+ *
+ * @param word 添加的新词
+ */
+ public void insert(String word) {
+ synchronized (insertTreeLock) {
+ needBuildAc = true;
+ Node p = root;
+ for (char curr : word.toCharArray()) {
+ int ind = curr;
+ if (p.next.get(ind) == null) {
+ p.next.put(ind, new Node());
+ }
+ p = p.next.get(ind);
+ }
+ p.flag = true;
+ p.str = word;
+ }
+ }
- /**
- * @param text 查询的文本(母串)
- * @return 关键字列表
- */
- public List find(final String text) {
- return this.find(text, true);
- }
+ /**
+ * 词库批量添加新词,初始化查找树
+ *
+ * @param words 添加的新词
+ */
+ public void insert(String... words) {
+ for (String word : words) {
+ this.insert(word);
+ }
+ }
- /**
- * @param text 查找的文本(母串)
- * @param isDensityMatch 是否密集匹配
- * @return 关键字列表
- */
- public List find(final String text, final boolean isDensityMatch) {
- final List ans = new ArrayList<>();
- Node p = root, k;
- for (int i = 0, len = text.length(); i < len; i++) {
- final int ind = text.charAt(i);
- // 状态转移(沿着fail指针链接的链表,此处区别于DFA模型)
- while (p != null && p.next.get(ind) == null) {
- p = p.fail;
- }
- if (p == null) {
- p = root;
- } else {
- p = p.next.get(ind);
- }
- // 提取结果(沿着fail指针链接的链表,此处区别于DFA模型)
- k = p;
- while (k != null) {
- if (k.flag) {
- ans.add(new FoundWord(k.str, k.str, i - k.str.length() + 1, i));
- if (!isDensityMatch) {
- p = root;
- break;
- }
- }
- k = k.fail;
- }
- }
- return ans;
- }
+ /**
+ * 构建基于NFA模型的 AC自动机
+ */
+ private void buildAc() {
+ Queue queue = new LinkedList<>();
+ Node p = root;
+ for (Integer key : p.next.keySet()) {
+ p.next.get(key).fail = root;
+ queue.offer(p.next.get(key));
+ }
+ while (!queue.isEmpty()) {
+ Node curr = queue.poll();
+ for (Integer key : curr.next.keySet()) {
+ Node fail = curr.fail;
+ // 查找当前节点匹配失败,他对应等效匹配的节点是哪个
+ while (fail != null && fail.next.get(key) == null) {
+ fail = fail.fail;
+ }
+ // 代码到这,有两种可能,fail不为null,说明找到了fail;fail为null,没有找到,那么就把fail指向root节点(当到该节点匹配失败,那么从root节点开始重新匹配)
+ if (fail != null) {
+ fail = fail.next.get(key);
+ } else {
+ fail = root;
+ }
+ curr.next.get(key).fail = fail;
+ queue.offer(curr.next.get(key));
+ }
+ }
+ needBuildAc = false;
+ }
- private static class Node {
+ /**
+ * @param text 查询的文本(母串)
+ */
+ public List find(String text) {
+ return this.find(text, true);
+ }
- boolean flag;
- Node fail;
- String str;
- Map next;
+ /**
+ * @param text 查找的文本(母串)
+ * @param isDensityMatch 是否密集匹配
+ */
+ public List find(String text, boolean isDensityMatch) {
+ // double check,防止重复无用的 buildAC
+ if (needBuildAc) {
+ synchronized (buildAcLock) {
+ if (needBuildAc) {
+ this.buildAc();
+ }
+ }
+ }
+ List ans = new ArrayList<>();
+ Node p = root, k = null;
+ for (int i = 0, len = text.length(); i < len; i++) {
+ int ind = text.charAt(i);
+ // 状态转移(沿着fail指针链接的链表,此处区别于DFA模型)
+ while (p != null && p.next.get(ind) == null) {
+ p = p.fail;
+ }
+ if (p == null) {
+ p = root;
+ } else {
+ p = p.next.get(ind);
+ }
+ // 提取结果(沿着fail指针链接的链表,此处区别于DFA模型)
+ k = p;
+ while (k != null) {
+ if (k.flag) {
+ ans.add(new FoundWord(k.str, k.str, i - k.str.length() + 1, i));
+ if (!isDensityMatch) {
+ p = root;
+ break;
+ }
+ }
+ k = k.fail;
+ }
+ }
+ return ans;
+ }
- public Node() {
- this.flag = false;
- next = new HashMap<>();
- }
- }
+
+ private static class Node {
+
+ /**
+ * 当前节点是否是一个单词的结尾
+ */
+ boolean flag;
+ /**
+ * 指向 当前节点匹配失败应该跳转的下个节点
+ */
+ Node fail;
+ /**
+ * 以当前节点结尾的单词
+ */
+ String str;
+ /**
+ * 当前节点的子节点
+ */
+ Map next;
+
+ public Node() {
+ this.flag = false;
+ next = new HashMap<>();
+ }
+ }
}
diff --git a/hutool-core/src/test/java/cn/hutool/core/text/dfa/NFATest.java b/hutool-core/src/test/java/cn/hutool/core/text/dfa/NFATest.java
index e38be87f3..978d1ee23 100644
--- a/hutool-core/src/test/java/cn/hutool/core/text/dfa/NFATest.java
+++ b/hutool-core/src/test/java/cn/hutool/core/text/dfa/NFATest.java
@@ -16,7 +16,7 @@ public class NFATest {
public void testFind() {
final NFA NFA = new NFA();
NFA.insert("say", "her", "he", "she", "shr");
- NFA.buildAc();
+// NFA.buildAc();
final WordTree wordTree = new WordTree();
wordTree.addWords("say", "her", "he", "she", "shr");
@@ -53,7 +53,7 @@ public class NFATest {
public void testFindNotDensity() {
final NFA NFA = new NFA();
NFA.insert("say", "her", "he", "she", "shr");
- NFA.buildAc();
+// NFA.buildAc();
final WordTree wordTree = new WordTree();
wordTree.addWords("say", "her", "he", "she", "shr");
@@ -89,7 +89,7 @@ public class NFATest {
stopWatch.start("automaton_char_buid_find");
final NFA NFALocal = new NFA();
NFALocal.insert("say", "her", "he", "she", "shr");
- NFALocal.buildAc();
+// NFALocal.buildAc();
final List ans1 = NFALocal.find(input);
stopWatch.stop();
@@ -124,7 +124,7 @@ public class NFATest {
stopWatch.start("automaton_cn_build_find");
final NFA NFALocal = new NFA();
NFALocal.insert("赵", "赵啊", "赵啊三");
- NFALocal.buildAc();
+// NFALocal.buildAc();
final List result = NFALocal.find(input);
stopWatch.stop();
@@ -161,7 +161,7 @@ public class NFATest {
final NFA NFALocal = new NFA();
NFALocal.insert("赵", "赵啊", "赵啊三");
- NFALocal.buildAc();
+// NFALocal.buildAc();
stopWatch.start("automaton_cn_find");
final List result = NFALocal.find(input);
@@ -200,7 +200,7 @@ public class NFATest {
final NFA NFALocal = new NFA();
NFALocal.insert("赵", "赵啊", "赵啊三");
- NFALocal.buildAc();
+// NFALocal.buildAc();
stopWatch.start("automaton_cn_find_not_density");
final List result = NFALocal.find(input, false);