diff --git a/CHANGELOG.md b/CHANGELOG.md index a296c5c98..c824818a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ * 【core 】 修复Bcrypt不支持$2y$盐前缀问题(pr#1560@Github) * 【system 】 修复isWindows8拼写问题(pr#1557@Github) * 【db 】 修复MongoDS默认分组参数失效问题(issue#1548@Github) +* 【core 】 修复UrlPath编码的字符问题导致的URL编码异常(issue#1537@Github) ------------------------------------------------------------------------------------------------------------- diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java b/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java index 19e0dd2ff..8ae9bc59f 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/URLDecoder.java @@ -25,11 +25,12 @@ public class URLDecoder implements Serializable { /** * 解码,不对+解码 - *
-	 *   1. 将%20转换为空格 ;
-	 *   2. 将"%xy"转换为文本形式,xy是两位16进制的数值;
-	 *   3. 跳过不符合规范的%形式,直接输出
-	 * 
+ * + *
    + *
  1. 将%20转换为空格
  2. + *
  3. 将 "%xy"转换为文本形式,xy是两位16进制的数值
  4. + *
  5. 跳过不符合规范的%形式,直接输出
  6. + *
* * @param str 包含URL编码后的字符串 * @param charset 编码 diff --git a/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java b/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java index cd71547a3..b45316b41 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/URLEncoder.java @@ -30,13 +30,25 @@ public class URLEncoder implements Serializable { * 默认的编码器针对URI路径编码,定义如下: * *
-	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@"
+	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@" / "/"
 	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
 	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
 	 * 
*/ public static final URLEncoder DEFAULT = createDefault(); + /** + * URL的Path的每一个Segment URLEncoder
+ * 默认的编码器针对URI路径编码,定义如下: + * + *
+	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
+	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+	 * 
+ */ + public static final URLEncoder PATH_SEGMENT = createPathSegment(); + /** * 用于查询语句的URLEncoder
* 编码器针对URI路径编码,定义如下: @@ -55,10 +67,10 @@ public class URLEncoder implements Serializable { /** * 全编码的URLEncoder
*
-	 * 	 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is
-	 * 	 '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z' 不编码
-	 * 	 其它编码为 %nn 形式
-	 * 	 
+ * 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is + * '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z' 不编码 + * 其它编码为 %nn 形式 + * */ public static final URLEncoder ALL = createAll(); @@ -67,7 +79,7 @@ public class URLEncoder implements Serializable { * 默认的编码器针对URI路径编码,定义如下: * *
-	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@"
+	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / ":" / "@" / "/"
 	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
 	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
 	 * 
@@ -101,6 +113,42 @@ public class URLEncoder implements Serializable { return encoder; } + /** + * URL的Path的每一个Segment URLEncoder
+ * 默认的编码器针对URI路径的每一段编码,定义如下: + * + *
+	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
+	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+	 * 
+ * + * @return URLEncoder + */ + public static URLEncoder createPathSegment() { + final URLEncoder encoder = new URLEncoder(); + encoder.addSafeCharacter('-'); + encoder.addSafeCharacter('.'); + encoder.addSafeCharacter('_'); + encoder.addSafeCharacter('~'); + // Add the sub-delims + encoder.addSafeCharacter('!'); + encoder.addSafeCharacter('$'); + encoder.addSafeCharacter('&'); + encoder.addSafeCharacter('\''); + encoder.addSafeCharacter('('); + encoder.addSafeCharacter(')'); + encoder.addSafeCharacter('*'); + encoder.addSafeCharacter('+'); + encoder.addSafeCharacter(','); + encoder.addSafeCharacter(';'); + encoder.addSafeCharacter('='); + // Add the remaining literals + encoder.addSafeCharacter('@'); + + return encoder; + } + /** * 创建用于查询语句的URLEncoder
* 编码器针对URI路径编码,定义如下: diff --git a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java index 748275e04..66fc70109 100644 --- a/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java +++ b/hutool-core/src/main/java/cn/hutool/core/net/url/UrlPath.java @@ -127,7 +127,7 @@ public class UrlPath { final StringBuilder builder = new StringBuilder(); for (String segment : segments) { - builder.append(CharUtil.SLASH).append(URLUtil.encode(segment, charset)); + builder.append(CharUtil.SLASH).append(URLUtil.encodePathSegment(segment, charset)); } if (withEngTag || StrUtil.isEmpty(builder)) { builder.append(CharUtil.SLASH); diff --git a/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java index 78c7df9eb..73ac980b7 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/URLUtil.java @@ -353,20 +353,6 @@ public class URLUtil { return encode(url, CharsetUtil.CHARSET_UTF_8); } - /** - * 编码URL,默认使用UTF-8编码
- * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
- * 此方法用于POST请求中的请求体自动编码,转义大部分特殊字符 - * - * @param url URL - * @return 编码后的URL - * @throws UtilException UnsupportedEncodingException - * @since 3.1.2 - */ - public static String encodeQuery(String url) throws UtilException { - return encodeQuery(url, CharsetUtil.CHARSET_UTF_8); - } - /** * 编码字符为 application/x-www-form-urlencoded
* 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
@@ -387,6 +373,39 @@ public class URLUtil { return URLEncoder.DEFAULT.encode(url, charset); } + /** + * 编码URL字符为 application/x-www-form-urlencoded
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL自动编码,类似于浏览器中键入地址自动编码,对于像类似于“/”的字符不再编码 + * + * @param url URL + * @param charset 编码 + * @return 编码后的URL + * @throws UtilException UnsupportedEncodingException + * @deprecated 请使用 {@link #encode(String, Charset)} + */ + @Deprecated + public static String encode(String url, String charset) throws UtilException { + if (StrUtil.isEmpty(url)) { + return url; + } + return encode(url, StrUtil.isBlank(charset) ? CharsetUtil.defaultCharset() : CharsetUtil.charset(charset)); + } + + /** + * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于POST请求中的请求体自动编码,转义大部分特殊字符 + * + * @param url URL + * @return 编码后的URL + * @throws UtilException UnsupportedEncodingException + * @since 3.1.2 + */ + public static String encodeQuery(String url) throws UtilException { + return encodeQuery(url, CharsetUtil.CHARSET_UTF_8); + } + /** * 编码字符为URL中查询语句
* 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
@@ -407,23 +426,6 @@ public class URLUtil { return URLEncoder.QUERY.encode(url, charset); } - /** - * 编码URL字符为 application/x-www-form-urlencoded
- * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
- * 此方法用于URL自动编码,类似于浏览器中键入地址自动编码,对于像类似于“/”的字符不再编码 - * - * @param url URL - * @param charset 编码 - * @return 编码后的URL - * @throws UtilException UnsupportedEncodingException - */ - public static String encode(String url, String charset) throws UtilException { - if (StrUtil.isEmpty(url)) { - return url; - } - return encode(url, StrUtil.isBlank(charset) ? CharsetUtil.defaultCharset() : CharsetUtil.charset(charset)); - } - /** * 编码URL
* 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
@@ -433,11 +435,61 @@ public class URLUtil { * @param charset 编码 * @return 编码后的URL * @throws UtilException UnsupportedEncodingException + * @deprecated 请使用 {@link #encodeQuery(String, Charset)} */ + @Deprecated public static String encodeQuery(String url, String charset) throws UtilException { return encodeQuery(url, StrUtil.isBlank(charset) ? CharsetUtil.defaultCharset() : CharsetUtil.charset(charset)); } + /** + * 编码URL,默认使用UTF-8编码
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL的Segment中自动编码,转义大部分特殊字符 + * + *
+	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
+	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+	 * 
+ * + * @param url URL + * @return 编码后的URL + * @throws UtilException UnsupportedEncodingException + * @since 5.6.5 + */ + public static String encodePathSegment(String url) throws UtilException { + return encodePathSegment(url, CharsetUtil.CHARSET_UTF_8); + } + + /** + * 编码字符为URL中查询语句
+ * 将需要转换的内容(ASCII码形式之外的内容),用十六进制表示法转换出来,并在之前加上%开头。
+ * 此方法用于URL的Segment中自动编码,转义大部分特殊字符 + * + *
+	 * pchar = unreserved(不处理) / pct-encoded / sub-delims(子分隔符) / "@"
+	 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+	 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+	 * 
+ * + * @param url 被编码内容 + * @param charset 编码 + * @return 编码后的字符 + * @since 5.6.5 + */ + public static String encodePathSegment(String url, Charset charset) { + if (StrUtil.isEmpty(url)) { + return url; + } + if (null == charset) { + charset = CharsetUtil.defaultCharset(); + } + return URLEncoder.PATH_SEGMENT.encode(url, charset); + } + + //-------------------------------------------------------------------------- decode + /** * 解码URL
* 将%开头的16进制表示的内容解码。 diff --git a/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java b/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java index 917910870..d5de9e859 100644 --- a/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/net/UrlBuilderTest.java @@ -250,4 +250,13 @@ public class UrlBuilderTest { final UrlBuilder urlBuilder = UrlBuilder.ofHttp(urlStr, CharsetUtil.CHARSET_UTF_8); Assert.assertEquals(urlStr, urlBuilder.toString()); } + + @Test + public void gimg2Test(){ + String url = "https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fpic.jj20.com%2Fup%2Fallimg%2F1114%2F0H320120Z3%2F200H3120Z3-6-1200.jpg&refer=http%3A%2F%2Fpic.jj20.com&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=jpeg?sec=1621996490&t=8c384c2823ea453da15a1b9cd5183eea"; + final UrlBuilder urlBuilder = UrlBuilder.of(url); + + + Assert.assertEquals(url, urlBuilder.toString()); + } } diff --git a/hutool-http/src/test/java/cn/hutool/http/HttpUtilTest.java b/hutool-http/src/test/java/cn/hutool/http/HttpUtilTest.java index 12ccdcdcb..0152a33fe 100644 --- a/hutool-http/src/test/java/cn/hutool/http/HttpUtilTest.java +++ b/hutool-http/src/test/java/cn/hutool/http/HttpUtilTest.java @@ -1,5 +1,6 @@ package cn.hutool.http; +import cn.hutool.core.codec.Base64; import cn.hutool.core.io.FileUtil; import cn.hutool.core.lang.Console; import cn.hutool.core.util.CharsetUtil; @@ -325,4 +326,11 @@ public class HttpUtilTest { final String s = HttpUtil.get("http://hq.sinajs.cn/list=sh600519"); Console.log(s); } + + @Test + @Ignore + public void gimg2Test(){ + byte[] bytes = HttpUtil.downloadBytes("https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fpic.jj20.com%2Fup%2Fallimg%2F1114%2F0H320120Z3%2F200H3120Z3-6-1200.jpg&refer=http%3A%2F%2Fpic.jj20.com&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=jpeg?sec=1621996490&t=8c384c2823ea453da15a1b9cd5183eea"); + Console.log(Base64.encode(bytes)); + } }