修复HtmlUtil的removeHtmlAttr方法匹配问题

This commit is contained in:
Looly
2023-04-25 16:14:22 +08:00
parent 9b8f0a0ed4
commit acb40326f6
3 changed files with 22 additions and 2 deletions

View File

@@ -155,7 +155,15 @@ public class HtmlUtil {
// [^>]+? 属性值,至少有一个非>的字符,>表示标签结束
// \s+(?=>) 表示属性值后跟空格加>,即末尾的属性,此时去掉空格
// (?=\s|>) 表示属性值后跟空格(属性后还有别的属性)或者跟>(最后一个属性)
regex = StrUtil.format("(?i)(\\s*{}\\s*=[^>]+?\\s+(?=>))|(\\s*{}\\s*=[^>]+?(?=\\s|>))", attr, attr);
regex = StrUtil.format("(?i)(\\s*{}\\s*=\\s*)" +
"(" +
// name="xxxx"
"([\"][^\"]+?[\"]\\s*)|" +
// name=xxx >
"([^>]+?\\s+(?=>))|" +
// name=xxx> 或者 name=xxx name2=xxx
"([^>]+?(?=\\s|>))" +
")", attr);
content = content.replaceAll(regex, StrUtil.EMPTY);
}
return content;

View File

@@ -181,4 +181,15 @@ public class HtmlUtilTest {
final String result = HtmlUtil.removeAllHtmlAttr(html, "div");
Assert.assertEquals("<div></div>", result);
}
@Test
public void issueI6YNTFTest() {
String html = "<html><body><div class=\"a1 a2\">hello world</div></body></html>";
String cleanText = HtmlUtil.removeHtmlAttr(html,"class");
Assert.assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
html = "<html><body><div class=a1>hello world</div></body></html>";
cleanText = HtmlUtil.removeHtmlAttr(html,"class");
Assert.assertEquals("<html><body><div>hello world</div></body></html>", cleanText);
}
}