From 923aa1cf21dbdce38cb32fd751ce8ed0f4bfc82a Mon Sep 17 00:00:00 2001 From: Looly Date: Mon, 17 Apr 2023 23:49:31 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8DURLUtil.decode=E6=97=A0?= =?UTF-8?q?=E6=B3=95=E8=A7=A3=E7=A0=81UTF-16=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 3 +- .../java/cn/hutool/core/net/URLDecoder.java | 56 ++++++++++++++++++- .../cn/hutool/core/net/UrlDecoderTest.java | 30 ++++++++++ 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 45927ec92..6c3a8d4cd 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,12 @@ # 🚀Changelog ------------------------------------------------------------------------------------------------------------- -# 5.8.19.M1 (2023-04-16) +# 5.8.19.M1 (2023-04-17) ### 🐣新特性 ### 🐞Bug修复 +* 【core 】 修复URLUtil.decode无法解码UTF-16问题(issue#3063@Github) ------------------------------------------------------------------------------------------------------------- # 5.8.18 (2023-04-16) diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java b/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java index 106d973e4..3ed6ef965 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java @@ -1,6 +1,7 @@ package cn.hutool.core.net; import cn.hutool.core.util.CharUtil; +import cn.hutool.core.util.CharsetUtil; import cn.hutool.core.util.StrUtil; import java.io.ByteArrayOutputStream; @@ -71,10 +72,41 @@ public class URLDecoder implements Serializable { * @return 解码后的字符串 */ public static String decode(String str, Charset charset, boolean isPlusToSpace) { - if(null == charset){ - return str; + if (null == str) { + return null; } - return StrUtil.str(decode(StrUtil.bytes(str, charset), isPlusToSpace), charset); + final int length = str.length(); + if(0 == length){ + return StrUtil.EMPTY; + } + + final StringBuilder result = new StringBuilder(length / 3); + + int begin = 0; + char c; + for (int i = 0; i < length; i++) { + c = str.charAt(i); + if(ESCAPE_CHAR == c || CharUtil.isHexChar(c)){ + continue; + } + + // 遇到非需要处理的字符跳过 + // 处理之前的hex字符 + if(i > begin){ + result.append(decodeSub(str, begin, i, charset, isPlusToSpace)); + } + + // 非Hex字符,忽略本字符 + result.append(c); + begin = i + 1; + } + + // 处理剩余字符 + if(begin < length){ + result.append(decodeSub(str, begin, length, charset, isPlusToSpace)); + } + + return result.toString(); } /** @@ -135,4 +167,22 @@ public class URLDecoder implements Serializable { } return buffer.toByteArray(); } + + /** + * 解码子串 + * + * @param str 字符串 + * @param begin 开始位置(包含) + * @param end 结束位置(不包含) + * @param charset 编码 + * @param isPlusToSpace 是否+转换为空格 + * @return 解码后的字符串 + */ + private static String decodeSub(final String str, final int begin, final int end, + final Charset charset, final boolean isPlusToSpace){ + return new String(decode( + // 截取需要decode的部分 + str.substring(begin, end).getBytes(CharsetUtil.CHARSET_ISO_8859_1), isPlusToSpace + ), charset); + } } diff --git a/hutool-core/src/test/java/cn/hutool/core/net/UrlDecoderTest.java b/hutool-core/src/test/java/cn/hutool/core/net/UrlDecoderTest.java index 906b5784a..42d3fa6f0 100644 --- a/hutool-core/src/test/java/cn/hutool/core/net/UrlDecoderTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/net/UrlDecoderTest.java @@ -1,12 +1,42 @@ package cn.hutool.core.net; import cn.hutool.core.util.CharsetUtil; +import cn.hutool.core.util.URLUtil; import org.junit.Assert; import org.junit.Test; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; + public class UrlDecoderTest { @Test public void decodeForPathTest(){ Assert.assertEquals("+", URLDecoder.decodeForPath("+", CharsetUtil.CHARSET_UTF_8)); } + + @Test + public void issue3063Test() throws UnsupportedEncodingException { + // https://github.com/dromara/hutool/issues/3063 + + final String s = "测试"; + final String expectedDecode = "%FE%FF%6D%4B%8B%D5"; + + final String s1 = URLUtil.encode(s, StandardCharsets.UTF_16); + Assert.assertEquals(expectedDecode, s1); + final String s2 = java.net.URLEncoder.encode(s, "UTF-16"); + Assert.assertEquals(expectedDecode, s2); + + final String decode = URLDecoder.decode(s1, StandardCharsets.UTF_16); + Assert.assertEquals(s, decode); + + // 测试编码字符串和非编码字符串混合 + final String mixDecoded = expectedDecode + "你好"; + final String decode2 = URLDecoder.decode(mixDecoded, StandardCharsets.UTF_16); + Assert.assertEquals("测试你好", decode2); + + Assert.assertEquals( + java.net.URLDecoder.decode(mixDecoded, "UTF-16"), + URLDecoder.decode(mixDecoded, StandardCharsets.UTF_16) + ); + } }