This commit is contained in:
Looly
2023-04-12 17:23:09 +08:00
parent a96a95c988
commit a7c18e81f6
38 changed files with 426 additions and 175 deletions

View File

@@ -140,15 +140,35 @@ public class TextSimilarity {
/**
* 求公共子串,采用动态规划算法。 其不要求所求得的字符在所给的字符串中是连续的。
* 2023-04-06 优化堆内存占用此处不需要matrix[m][n]的完整矩阵,仅需右下角值
*
* @param strA 字符串1
* @param strB 字符串2
* @return 公共子串
*/
private static int longestCommonSubstringLength(final String strA, final String strB) {
public static int longestCommonSubstringLength(final String strA, final String strB) {
final int m = strA.length();
final int n = strB.length();
return generateMatrix(strA, strB)[m][n];
// 初始化矩阵数据,matrix[0][0]的值为0 如果字符数组chars_strA和chars_strB的对应位相同则matrix[i][j]的值为左上角的值加1
// 否则matrix[i][j]的值等于左上方最近两个位置的较大值, 矩阵中其余各点的值为0.
int[] lastLine = new int[n + 1];
int[] currLine = new int[n + 1];
int[] temp;
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (strA.charAt(i - 1) == strB.charAt(j - 1)) {
currLine[j] = lastLine[j-1] + 1;
} else {
currLine[j] = Math.max(currLine[j-1], lastLine[j]);
}
}
temp = lastLine;
lastLine = currLine;
currLine = temp;
}
return lastLine[n];
}
/**

View File

@@ -1,12 +1,15 @@
package org.dromara.hutool.core.text;
import org.dromara.hutool.core.lang.Console;
import org.dromara.hutool.core.util.RandomUtil;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
/**
* 文本相似度计算工具类单元测试
* @author looly
*
* @author looly
*/
public class TextSimilarityTest {
@@ -35,8 +38,20 @@ public class TextSimilarityTest {
}
@Test
public void similarTest(){
public void similarTest() {
final double abd = TextSimilarity.similar("abd", "1111");
Assertions.assertEquals(0, abd, 1);
}
@Test
@Disabled
void longestCommonSubstringLengthTest() {
// https://github.com/dromara/hutool/issues/3045
final String strCommon = RandomUtil.randomString(1024 * 32);
final String strA = RandomUtil.randomString(1024 * 32) + strCommon;
final String strB = RandomUtil.randomString(1024 * 32) + strCommon;
final int i = TextSimilarity.longestCommonSubstringLength(strA, strB);
Console.log(i);
}
}