fix escape bug

This commit is contained in:
Looly
2021-06-24 07:53:21 +08:00
parent a97e967ecd
commit 949c7a856e
14 changed files with 166 additions and 36 deletions

View File

@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -576,7 +577,7 @@ public class Convert {
* @param <T> 元素类型
* @param elementType 集合中元素类型
* @param value 被转换的值
* @return {@link List}
* @return {@link ArrayList}
* @since 4.1.20
*/
@SuppressWarnings("unchecked")
@@ -584,6 +585,20 @@ public class Convert {
return (List<T>) toCollection(ArrayList.class, elementType, value);
}
/**
* 转换为HashSet
*
* @param <T> 元素类型
* @param elementType 集合中元素类型
* @param value 被转换的值
* @return {@link HashSet}
* @since 5.7.3
*/
@SuppressWarnings("unchecked")
public static <T> Set<T> toSet(Class<T> elementType, Object value) {
return (Set<T>) toCollection(HashSet.class, elementType, value);
}
/**
* 转换为Map
*

View File

@@ -1,7 +1,6 @@
package cn.hutool.core.text.escape;
import cn.hutool.core.text.replacer.LookupReplacer;
import cn.hutool.core.text.replacer.ReplacerChain;
/**
* HTML4的ESCAPE
@@ -10,16 +9,9 @@ import cn.hutool.core.text.replacer.ReplacerChain;
* @author looly
*
*/
public class Html4Escape extends ReplacerChain {
public class Html4Escape extends XmlEscape {
private static final long serialVersionUID = 1L;
protected static final String[][] BASIC_ESCAPE = { //
{ "\"", "&quot;" }, // " - double-quote
{ "&", "&amp;" }, // & - ampersand
{ "<", "&lt;" }, // < - less-than
{ ">", "&gt;" }, // > - greater-than
};
protected static final String[][] ISO8859_1_ESCAPE = { //
{ "\u00A0", "&nbsp;" }, // non-breaking space
{ "\u00A1", "&iexcl;" }, // inverted exclamation mark
@@ -317,7 +309,7 @@ public class Html4Escape extends ReplacerChain {
};
public Html4Escape() {
addChain(new LookupReplacer(BASIC_ESCAPE));
super();
addChain(new LookupReplacer(ISO8859_1_ESCAPE));
addChain(new LookupReplacer(HTML40_EXTENDED_ESCAPE));
}

View File

@@ -1,7 +1,6 @@
package cn.hutool.core.text.escape;
import cn.hutool.core.text.replacer.LookupReplacer;
import cn.hutool.core.text.replacer.ReplacerChain;
/**
* HTML4的UNESCAPE
@@ -9,20 +8,15 @@ import cn.hutool.core.text.replacer.ReplacerChain;
* @author looly
*
*/
public class Html4Unescape extends ReplacerChain {
public class Html4Unescape extends XmlUnescape {
private static final long serialVersionUID = 1L;
protected static final String[][] BASIC_UNESCAPE = InternalEscapeUtil.invert(Html4Escape.BASIC_ESCAPE);
protected static final String[][] ISO8859_1_UNESCAPE = InternalEscapeUtil.invert(Html4Escape.ISO8859_1_ESCAPE);
protected static final String[][] HTML40_EXTENDED_UNESCAPE = InternalEscapeUtil.invert(Html4Escape.HTML40_EXTENDED_ESCAPE);
// issue#1118
protected static final String[][] OTHER_UNESCAPE = new String[][]{new String[]{"&apos;", "'"}};
public Html4Unescape() {
addChain(new LookupReplacer(BASIC_UNESCAPE));
super();
addChain(new LookupReplacer(ISO8859_1_UNESCAPE));
addChain(new LookupReplacer(HTML40_EXTENDED_UNESCAPE));
addChain(new LookupReplacer(OTHER_UNESCAPE));
addChain(new NumericEntityUnescaper());
}
}

View File

@@ -0,0 +1,38 @@
package cn.hutool.core.text.escape;
import cn.hutool.core.text.replacer.LookupReplacer;
import cn.hutool.core.text.replacer.ReplacerChain;
/**
* XML特殊字符转义<br>
* 见https://stackoverflow.com/questions/1091945/what-characters-do-i-need-to-escape-in-xml-documents<br>
*
* <pre>
* &amp; (ampersand) 替换为 &amp;amp;
* &lt; (less than) 替换为 &amp;lt;
* &gt; (greater than) 替换为 &amp;gt;
* &quot; (double quote) 替换为 &amp;quot;
* &apos; (single quote / apostrophe) 替换为 &amp;apos;
* </pre>
*
* @author looly
* @since 5.7.2
*/
public class XmlEscape extends ReplacerChain {
private static final long serialVersionUID = 1L;
protected static final String[][] BASIC_ESCAPE = { //
{"'", "&apos;"}, // " - single-quote
{"\"", "&quot;"}, // " - double-quote
{"&", "&amp;"}, // & - ampersand
{"<", "&lt;"}, // < - less-than
{">", "&gt;"}, // > - greater-than
};
/**
* 构造
*/
public XmlEscape() {
addChain(new LookupReplacer(BASIC_ESCAPE));
}
}

View File

@@ -0,0 +1,21 @@
package cn.hutool.core.text.escape;
import cn.hutool.core.text.replacer.LookupReplacer;
import cn.hutool.core.text.replacer.ReplacerChain;
/**
* XML的UNESCAPE
*
* @author looly
* @since 5.7.2
*/
public class XmlUnescape extends ReplacerChain {
private static final long serialVersionUID = 1L;
protected static final String[][] BASIC_UNESCAPE = InternalEscapeUtil.invert(XmlEscape.BASIC_ESCAPE);
public XmlUnescape() {
addChain(new LookupReplacer(BASIC_UNESCAPE));
addChain(new NumericEntityUnescaper());
}
}

View File

@@ -1,10 +1,10 @@
package cn.hutool.core.text.replacer;
import java.io.Serializable;
import cn.hutool.core.lang.Replacer;
import cn.hutool.core.text.StrBuilder;
import java.io.Serializable;
/**
* 抽象字符串替换类<br>
* 通过实现replace方法实现局部替换逻辑
@@ -28,18 +28,18 @@ public abstract class StrReplacer implements Replacer<CharSequence>, Serializabl
@Override
public CharSequence replace(CharSequence t) {
final int len = t.length();
final StrBuilder strBuillder = StrBuilder.create(len);
final StrBuilder builder = StrBuilder.create(len);
int pos = 0;//当前位置
int consumed;//处理过的字符数
while (pos < len) {
consumed = replace(t, pos, strBuillder);
consumed = replace(t, pos, builder);
if (0 == consumed) {
//0表示未处理或替换任何字符原样输出本字符并从下一个字符继续
strBuillder.append(t.charAt(pos));
builder.append(t.charAt(pos));
pos++;
}
pos += consumed;
}
return strBuillder;
return builder;
}
}

View File

@@ -3,6 +3,8 @@ package cn.hutool.core.util;
import cn.hutool.core.lang.Filter;
import cn.hutool.core.text.escape.Html4Escape;
import cn.hutool.core.text.escape.Html4Unescape;
import cn.hutool.core.text.escape.XmlEscape;
import cn.hutool.core.text.escape.XmlUnescape;
/**
* 转义和反转义工具类Escape / Unescape<br>
@@ -24,6 +26,37 @@ public class EscapeUtil {
|| StrUtil.contains(NOT_ESCAPE_CHARS, c)
);
/**
* 转义XML中的特殊字符<br>
* <pre>
* &amp; (ampersand) 替换为 &amp;amp;
* &lt; (less than) 替换为 &amp;lt;
* &gt; (greater than) 替换为 &amp;gt;
* &quot; (double quote) 替换为 &amp;quot;
* &apos; (single quote / apostrophe) 替换为 &amp;apos;
* </pre>
*
* @param xml XML文本
* @return 转义后的文本
* @since 5.7.2
*/
public static String escapeXml(CharSequence xml) {
XmlEscape escape = new XmlEscape();
return escape.replace(xml).toString();
}
/**
* 反转义XML中的特殊字符
*
* @param xml XML文本
* @return 转义后的文本
* @since 5.7.2
*/
public static String unescapeXml(CharSequence xml) {
XmlUnescape unescape = new XmlUnescape();
return unescape.replace(xml).toString();
}
/**
* 转义HTML4中的特殊字符
*

View File

@@ -986,7 +986,7 @@ public class XmlUtil {
* @since 4.0.8
*/
public static String escape(String string) {
return EscapeUtil.escape(string);
return EscapeUtil.escapeHtml4(string);
}
/**
@@ -998,7 +998,7 @@ public class XmlUtil {
* @since 5.0.6
*/
public static String unescape(String string) {
return EscapeUtil.unescape(string);
return EscapeUtil.unescapeHtml4(string);
}
/**

View File

@@ -4,12 +4,12 @@ import org.junit.Assert;
import org.junit.Test;
public class EscapeUtilTest {
@Test
public void escapeHtml4Test() {
String escapeHtml4 = EscapeUtil.escapeHtml4("<a>你好</a>");
Assert.assertEquals("&lt;a&gt;你好&lt;/a&gt;", escapeHtml4);
String result = EscapeUtil.unescapeHtml4("&#25391;&#33633;&#22120;&#31867;&#22411;");
Assert.assertEquals("振荡器类型", result);
@@ -39,9 +39,9 @@ public class EscapeUtilTest {
}
@Test
public void escapeSinleQuotesTest(){
public void escapeSingleQuotesTest(){
String str = "'some text with single quotes'";
final String s = EscapeUtil.escapeHtml4(str);
Assert.assertEquals(str, s);
Assert.assertEquals("&apos;some text with single quotes&apos;", s);
}
}

View File

@@ -287,4 +287,11 @@ public class XmlUtilTest {
String format = XmlUtil.toStr(xml,"GBK",true);
Console.log(format);
}
@Test
public void escapeTest(){
String a = "<>";
final String escape = XmlUtil.escape(a);
Console.log(escape);
}
}