mirror of
https://gitee.com/chinabugotech/hutool.git
synced 2025-07-21 15:09:48 +08:00
fix code
This commit is contained in:
@@ -22,6 +22,12 @@
|
||||
<artifactId>hutool-core</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents.client5</groupId>
|
||||
<artifactId>httpclient5</artifactId>
|
||||
<version>5.1.3</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.xml.soap</groupId>
|
||||
<artifactId>javax.xml.soap-api</artifactId>
|
||||
|
@@ -1,9 +1,11 @@
|
||||
package cn.hutool.http;
|
||||
|
||||
import cn.hutool.core.compress.InflaterInputStream;
|
||||
import cn.hutool.core.map.CaseInsensitiveMap;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Map;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
/**
|
||||
* 全局响应内容压缩解压器注册中心<br>
|
||||
@@ -23,7 +25,12 @@ public enum GlobalCompressStreamRegister {
|
||||
*/
|
||||
private final Map<String, Class<? extends InputStream>> compressMap = new CaseInsensitiveMap<>();
|
||||
|
||||
/**
|
||||
* 构造,初始化默认的压缩算法
|
||||
*/
|
||||
GlobalCompressStreamRegister() {
|
||||
compressMap.put("gzip", GZIPInputStream.class);
|
||||
compressMap.put("deflate", InflaterInputStream.class);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -7,19 +7,17 @@ import java.io.ByteArrayInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.Inflater;
|
||||
import java.util.zip.InflaterInputStream;
|
||||
|
||||
/**
|
||||
* HTTP输入流,此流用于包装Http请求响应内容的流,用于解析各种压缩、分段的响应流内容
|
||||
*
|
||||
* @author Looly
|
||||
*
|
||||
*/
|
||||
public class HttpInputStream extends InputStream {
|
||||
|
||||
/** 原始流 */
|
||||
/**
|
||||
* 原始流
|
||||
*/
|
||||
private InputStream in;
|
||||
|
||||
/**
|
||||
@@ -89,30 +87,17 @@ public class HttpInputStream extends InputStream {
|
||||
|
||||
// 在一些情况下,返回的流为null,此时提供状态码说明
|
||||
if (null == this.in) {
|
||||
this.in = new ByteArrayInputStream(StrUtil.format("Error request, response status: {}", response.status).getBytes());
|
||||
this.in = new ByteArrayInputStream(StrUtil.format("Error request, null response with status: {}", response.status).getBytes());
|
||||
return;
|
||||
}
|
||||
|
||||
final String contentEncoding = response.contentEncoding();
|
||||
if (StrUtil.equalsIgnoreCase("gzip", contentEncoding) && false == (response.in instanceof GZIPInputStream)) {
|
||||
// Accept-Encoding: gzip
|
||||
final Class<? extends InputStream> streamClass = GlobalCompressStreamRegister.INSTANCE.get(contentEncoding);
|
||||
if (null != streamClass) {
|
||||
try {
|
||||
this.in = new GZIPInputStream(this.in);
|
||||
} catch (final IOException ignore) {
|
||||
// 在类似于Head等方法中无body返回,此时GZIPInputStream构造会出现错误,在此忽略此错误读取普通数据
|
||||
// ignore
|
||||
}
|
||||
} else if (StrUtil.equalsIgnoreCase("deflate", contentEncoding) && false == (this.in instanceof InflaterInputStream)) {
|
||||
// Accept-Encoding: defalte
|
||||
this.in = new InflaterInputStream(this.in, new Inflater(true));
|
||||
} else{
|
||||
final Class<? extends InputStream> streamClass = GlobalCompressStreamRegister.INSTANCE.get(contentEncoding);
|
||||
if(null != streamClass){
|
||||
try {
|
||||
this.in = ConstructorUtil.newInstance(streamClass, this.in);
|
||||
} catch (final Exception ignore) {
|
||||
// 对于构造错误的压缩算法,跳过之
|
||||
}
|
||||
this.in = ConstructorUtil.newInstance(streamClass, this.in);
|
||||
} catch (final Exception ignore) {
|
||||
// 对于构造错误的压缩算法,跳过之
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
package cn.hutool.http;
|
||||
package cn.hutool.http.html;
|
||||
|
||||
import cn.hutool.core.lang.Console;
|
||||
import cn.hutool.core.map.SafeConcurrentHashMap;
|
@@ -1,10 +1,12 @@
|
||||
package cn.hutool.http;
|
||||
package cn.hutool.http.html;
|
||||
|
||||
import cn.hutool.core.regex.ReUtil;
|
||||
import cn.hutool.core.text.StrUtil;
|
||||
import cn.hutool.core.text.escape.EscapeUtil;
|
||||
import cn.hutool.core.util.XmlUtil;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* HTML工具类
|
||||
*
|
||||
@@ -13,18 +15,23 @@ import cn.hutool.core.util.XmlUtil;
|
||||
* 比如去掉指定标签(例如广告栏等)、去除JS、去掉样式等等,这些操作都可以使用此工具类完成。
|
||||
*
|
||||
* @author xiaoleilu
|
||||
*
|
||||
*/
|
||||
public class HtmlUtil {
|
||||
|
||||
public static final String RE_HTML_MARK = "(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)";
|
||||
public static final String RE_SCRIPT = "<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>";
|
||||
/**
|
||||
* HTML标签正则
|
||||
*/
|
||||
public static final Pattern RE_HTML_MARK = Pattern.compile("(<[^<]*?>)|(<\\s*?/[^<]*?>)|(<[^<]*?/\\s*?>)", Pattern.CASE_INSENSITIVE);
|
||||
/**
|
||||
* script标签正则
|
||||
*/
|
||||
public static final Pattern RE_SCRIPT = Pattern.compile("<[\\s]*?script[^>]*?>.*?<[\\s]*?\\/[\\s]*?script[\\s]*?>", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
private static final char[][] TEXT = new char[64][];
|
||||
|
||||
static {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
TEXT[i] = new char[] { (char) i };
|
||||
TEXT[i] = new char[]{(char) i};
|
||||
}
|
||||
|
||||
// special HTML characters
|
||||
@@ -75,14 +82,24 @@ public class HtmlUtil {
|
||||
* @return 清除标签后的文本
|
||||
*/
|
||||
public static String cleanHtmlTag(final String content) {
|
||||
return content.replaceAll(RE_HTML_MARK, "");
|
||||
return ReUtil.replaceAll(content, RE_HTML_MARK, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除所有script标签,包括内容
|
||||
*
|
||||
* @param content 文本
|
||||
* @return 清除标签后的文本
|
||||
*/
|
||||
public static String removeScriptTag(final String content) {
|
||||
return ReUtil.replaceAll(content, RE_SCRIPT, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除指定HTML标签和被标签包围的内容<br>
|
||||
* 不区分大小写
|
||||
*
|
||||
* @param content 文本
|
||||
* @param content 文本
|
||||
* @param tagNames 要清除的标签
|
||||
* @return 去除标签后的文本
|
||||
*/
|
||||
@@ -94,7 +111,7 @@ public class HtmlUtil {
|
||||
* 清除指定HTML标签,不包括内容<br>
|
||||
* 不区分大小写
|
||||
*
|
||||
* @param content 文本
|
||||
* @param content 文本
|
||||
* @param tagNames 要清除的标签
|
||||
* @return 去除标签后的文本
|
||||
*/
|
||||
@@ -106,9 +123,9 @@ public class HtmlUtil {
|
||||
* 清除指定HTML标签<br>
|
||||
* 不区分大小写
|
||||
*
|
||||
* @param content 文本
|
||||
* @param content 文本
|
||||
* @param withTagContent 是否去掉被包含在标签中的内容
|
||||
* @param tagNames 要清除的标签
|
||||
* @param tagNames 要清除的标签
|
||||
* @return 去除标签后的文本
|
||||
*/
|
||||
public static String removeHtmlTag(String content, final boolean withTagContent, final String... tagNames) {
|
||||
@@ -136,7 +153,7 @@ public class HtmlUtil {
|
||||
* 去除HTML标签中的属性,如果多个标签有相同属性,都去除
|
||||
*
|
||||
* @param content 文本
|
||||
* @param attrs 属性名(不区分大小写)
|
||||
* @param attrs 属性名(不区分大小写)
|
||||
* @return 处理后的文本
|
||||
*/
|
||||
public static String removeHtmlAttr(String content, final String... attrs) {
|
||||
@@ -156,7 +173,7 @@ public class HtmlUtil {
|
||||
/**
|
||||
* 去除指定标签的所有属性
|
||||
*
|
||||
* @param content 内容
|
||||
* @param content 内容
|
||||
* @param tagNames 指定标签
|
||||
* @return 处理后的文本
|
||||
*/
|
6
hutool-http/src/main/java/cn/hutool/http/html/package-info.java
Executable file
6
hutool-http/src/main/java/cn/hutool/http/html/package-info.java
Executable file
@@ -0,0 +1,6 @@
|
||||
/**
|
||||
* HTML相关工具封装
|
||||
*
|
||||
* @author looly
|
||||
*/
|
||||
package cn.hutool.http.html;
|
@@ -9,6 +9,7 @@ import cn.hutool.http.server.action.RootAction;
|
||||
import cn.hutool.http.server.filter.HttpFilter;
|
||||
import cn.hutool.http.server.filter.SimpleFilter;
|
||||
import cn.hutool.http.server.handler.ActionHandler;
|
||||
|
||||
import com.sun.net.httpserver.Filter;
|
||||
import com.sun.net.httpserver.HttpContext;
|
||||
import com.sun.net.httpserver.HttpExchange;
|
||||
|
@@ -1,5 +1,6 @@
|
||||
package cn.hutool.http;
|
||||
|
||||
import cn.hutool.http.html.HtmlUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@@ -115,9 +115,11 @@ public class HttpRequestTest {
|
||||
@Test
|
||||
@Ignore
|
||||
public void getDeflateTest() {
|
||||
final String res = HttpRequest.get("https://comment.bilibili.com/67573272.xml")
|
||||
.execute().body();
|
||||
Console.log(res);
|
||||
final HttpResponse res = HttpRequest.get("https://comment.bilibili.com/67573272.xml")
|
||||
.header(Header.ACCEPT_ENCODING, "deflate")
|
||||
.execute();
|
||||
Console.log(res.header(Header.CONTENT_ENCODING));
|
||||
Console.log(res.body());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Reference in New Issue
Block a user