This commit is contained in:
Looly
2023-04-25 17:57:23 +08:00
parent bd97a73cb9
commit a6f01e75c6
7 changed files with 133 additions and 10 deletions

View File

@@ -33,7 +33,7 @@ public class ClientEngineFactory {
* @return 单例的ClientEngine
*/
public static ClientEngine getEngine() {
return Singleton.get(ClientEngine.class.getName(), ClientEngineFactory::getEngine);
return Singleton.get(ClientEngine.class.getName(), ClientEngineFactory::createEngine);
}
/**

View File

@@ -186,7 +186,15 @@ public class HtmlUtil {
// [^>]+? 属性值,至少有一个非>的字符,>表示标签结束
// \s+(?=>) 表示属性值后跟空格加>,即末尾的属性,此时去掉空格
// (?=\s|>) 表示属性值后跟空格(属性后还有别的属性)或者跟>(最后一个属性)
regex = StrUtil.format("(?i)(\\s*{}\\s*=[^>]+?\\s+(?=>))|(\\s*{}\\s*=[^>]+?(?=\\s|>))", attr, attr);
regex = StrUtil.format("(?i)(\\s*{}\\s*=\\s*)" +
"(" +
// name="xxxx"
"([\"][^\"]+?[\"]\\s*)|" +
// name=xxx >
"([^>]+?\\s+(?=>))|" +
// name=xxx> 或者 name=xxx name2=xxx
"([^>]+?(?=\\s|>))" +
")", attr);
content = content.replaceAll(regex, StrUtil.EMPTY);
}
return content;

View File

@@ -199,4 +199,15 @@ public class HtmlUtilTest {
charsetName = ReUtil.get(HtmlUtil.META_CHARSET_PATTERN, "<meta charset = \"utf-8\"", 1);
Assertions.assertEquals("utf-8", charsetName);
}
@Test
void issueI6YNTFTest() {
String html = "<html><body><div class=\"a1 a2\">hello world</div></body></html>";
String cleanText = HtmlUtil.removeHtmlAttr(html,"class");
Assertions.assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
html = "<html><body><div class=a1>hello world</div></body></html>";
cleanText = HtmlUtil.removeHtmlAttr(html,"class");
Assertions.assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
}
}