From c50625e21561dd901e818a08fe0e0a4ccf66d0f9 Mon Sep 17 00:00:00 2001 From: Looly Date: Sun, 24 Nov 2024 00:18:33 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=8F=8C=E5=BC=95=E5=8F=B7?= =?UTF-8?q?=E8=BD=AC=E4=B9=89=E7=AC=A6=E8=BD=AC=E4=B9=89=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=8C=E4=BF=AE=E6=94=B9=E8=A7=84=E5=88=99?= =?UTF-8?q?=E5=90=8E=EF=BC=8C=E5=AF=B9=E9=9D=9E=E9=97=AD=E5=90=88=E5=8F=8C?= =?UTF-8?q?=E5=BC=95=E5=8F=B7=E5=AD=97=E6=AE=B5=E7=9A=84=E7=AD=96=E7=95=A5?= =?UTF-8?q?=E5=8F=98=E6=9B=B4=EF=BC=8C=E5=A6=82"aa=EF=BC=8C=E5=88=99?= =?UTF-8?q?=E8=A2=AB=E8=AF=86=E5=88=AB=E4=B8=BAaa=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E5=8F=8C=E5=BC=95=E5=8F=B7=E8=BD=AC=E4=B9=89=E7=AC=A6=E8=BD=AC?= =?UTF-8?q?=E4=B9=89=E9=94=99=E8=AF=AF=E9=97=AE=E9=A2=98=EF=BC=8C=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E8=A7=84=E5=88=99=E5=90=8E=EF=BC=8C=E5=AF=B9=E9=9D=9E?= =?UTF-8?q?=E9=97=AD=E5=90=88=E5=8F=8C=E5=BC=95=E5=8F=B7=E5=AD=97=E6=AE=B5?= =?UTF-8?q?=E7=9A=84=E7=AD=96=E7=95=A5=E5=8F=98=E6=9B=B4=EF=BC=8C=E5=A6=82?= =?UTF-8?q?"aa=EF=BC=8C=E5=88=99=E8=A2=AB=E8=AF=86=E5=88=AB=E4=B8=BAaa?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../dromara/hutool/poi/csv/CsvBaseReader.java | 2 +- .../org/dromara/hutool/poi/csv/CsvParser.java | 188 ++------- .../dromara/hutool/poi/csv/CsvParser2.java | 362 ------------------ .../dromara/hutool/poi/csv/CsvReadConfig.java | 2 +- .../dromara/hutool/poi/csv/CsvTokener.java | 29 +- .../hutool/poi/excel/sax/ExcelSaxUtil.java | 4 +- .../dromara/hutool/poi/csv/CsvParserTest.java | 40 +- .../dromara/hutool/poi/csv/CsvReaderTest.java | 12 +- .../dromara/hutool/poi/csv/CsvUtilTest.java | 32 +- 9 files changed, 124 insertions(+), 547 deletions(-) delete mode 100644 hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvParser2.java diff --git a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvBaseReader.java b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvBaseReader.java index f59287d9a..163dc73fc 100644 --- a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvBaseReader.java +++ b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvBaseReader.java @@ -66,7 +66,7 @@ public class CsvBaseReader implements Serializable { * @param config 配置项 */ public CsvBaseReader(final CsvReadConfig config) { - this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::defaultConfig); + this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::of); } //--------------------------------------------------------------------------------------------- Constructor end diff --git a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvParser.java b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvParser.java index c410469a7..e7a73c3ed 100644 --- a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvParser.java +++ b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvParser.java @@ -18,11 +18,10 @@ package org.dromara.hutool.poi.csv; import org.dromara.hutool.core.collection.iter.ComputeIter; import org.dromara.hutool.core.io.IORuntimeException; -import org.dromara.hutool.core.io.IoUtil; import org.dromara.hutool.core.map.MapUtil; +import org.dromara.hutool.core.text.CharUtil; import org.dromara.hutool.core.text.StrTrimer; import org.dromara.hutool.core.text.StrUtil; -import org.dromara.hutool.core.text.CharUtil; import org.dromara.hutool.core.util.ObjUtil; import java.io.Closeable; @@ -41,10 +40,8 @@ public final class CsvParser extends ComputeIter implements Closeable, S private static final int DEFAULT_ROW_CAPACITY = 10; - private final Reader reader; private final CsvReadConfig config; - - private final Buffer buf; + private final CsvTokener tokener; /** * 前一个特殊分界字符 */ @@ -90,20 +87,8 @@ public final class CsvParser extends ComputeIter implements Closeable, S * @param config 配置,null则为默认配置 */ public CsvParser(final Reader reader, final CsvReadConfig config) { - this(reader, config, IoUtil.DEFAULT_LARGE_BUFFER_SIZE); - } - - /** - * CSV解析器 - * - * @param reader Reader - * @param config 配置,null则为默认配置 - * @param bufferSize 默认缓存大小 - */ - public CsvParser(final Reader reader, final CsvReadConfig config, final int bufferSize) { - this.reader = Objects.requireNonNull(reader, "reader must not be null"); - this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::defaultConfig); - this.buf = new Buffer(bufferSize); + this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::of); + this.tokener = new CsvTokener(reader); } /** @@ -130,7 +115,7 @@ public final class CsvParser extends ComputeIter implements Closeable, S /** * 读取下一行数据 * - * @return CsvRow + * @return CsvRow,{@code null}表示 * @throws IORuntimeException IO读取异常 */ public CsvRow nextRow() throws IORuntimeException { @@ -230,36 +215,28 @@ public final class CsvParser extends ComputeIter implements Closeable, S final List currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY); final StringBuilder currentField = this.currentField; - final Buffer buf = this.buf; int preChar = this.preChar;//前一个特殊分界字符 - int copyLen = 0; //拷贝长度 boolean inComment = false; + int c; while (true) { - if (!buf.hasRemaining()) { - // 此Buffer读取结束,开始读取下一段 - if (copyLen > 0) { - buf.appendTo(currentField, copyLen); - // 此处无需mark,read方法会重置mark - } - if (buf.read(this.reader) < 0) { - // CSV读取结束 - finished = true; - - if (currentField.length() > 0 || preChar == config.fieldSeparator) { - //剩余部分作为一个字段 - addField(currentFields, currentField.toString()); - currentField.setLength(0); + c = tokener.next(); + if(c < 0){ + if (currentField.length() > 0 || preChar == config.fieldSeparator) { + if(this.inQuotes){ + // 未闭合的文本包装,在末尾补充包装符 + currentField.append(config.textDelimiter); } - break; + + //剩余部分作为一个字段 + addField(currentFields, currentField.toString()); + currentField.setLength(0); } - - //重置 - copyLen = 0; + // 读取结束 + this.finished = true; + break; } - final char c = buf.get(); - // 注释行标记 if (preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF) { // 判断行首字符为指定注释字符的注释开始,直到遇到换行符 @@ -277,16 +254,20 @@ public final class CsvParser extends ComputeIter implements Closeable, S inComment = false; } // 跳过注释行中的任何字符 - buf.mark(); - preChar = c; continue; } if (inQuotes) { //引号内,作为内容,直到引号结束 if (c == config.textDelimiter) { - // End of quoted text - inQuotes = false; + // issue#IB5UQ8 文本包装符转义 + final int next = tokener.next(); + if(next != config.textDelimiter){ + // 包装结束 + inQuotes = false; + tokener.back(); + } + // https://datatracker.ietf.org/doc/html/rfc4180#section-2 跳过转义符,只保留被转义的包装符 } else { // 字段内容中新行 if (isLineEnd(c, preChar)) { @@ -294,28 +275,19 @@ public final class CsvParser extends ComputeIter implements Closeable, S } } // 普通字段字符 - copyLen++; + currentField.append((char)c); } else { // 非引号内 if (c == config.fieldSeparator) { //一个字段结束 - if (copyLen > 0) { - buf.appendTo(currentField, copyLen); - copyLen = 0; - } - buf.mark(); addField(currentFields, currentField.toString()); currentField.setLength(0); } else if (c == config.textDelimiter && isFieldBegin(preChar)) { // 引号开始且出现在字段开头 inQuotes = true; - copyLen++; + currentField.append((char)c); } else if (c == CharUtil.CR) { - // \r,直接结束 - if (copyLen > 0) { - buf.appendTo(currentField, copyLen); - } - buf.mark(); + // \r addField(currentFields, currentField.toString()); currentField.setLength(0); preChar = c; @@ -323,20 +295,14 @@ public final class CsvParser extends ComputeIter implements Closeable, S } else if (c == CharUtil.LF) { // \n if (preChar != CharUtil.CR) { - if (copyLen > 0) { - buf.appendTo(currentField, copyLen); - } - buf.mark(); addField(currentFields, currentField.toString()); currentField.setLength(0); preChar = c; break; } // 前一个字符是\r,已经处理过这个字段了,此处直接跳过 - buf.mark(); } else { - // 普通字符 - copyLen++; + currentField.append((char)c); } } @@ -352,7 +318,7 @@ public final class CsvParser extends ComputeIter implements Closeable, S @Override public void close() throws IOException { - reader.close(); + tokener.close(); } /** @@ -369,9 +335,6 @@ public final class CsvParser extends ComputeIter implements Closeable, S if(StrUtil.isWrap(field, textDelimiter)){ field = StrUtil.sub(field, 1, field.length() - 1); - // https://datatracker.ietf.org/doc/html/rfc4180#section-2 - // 第七条规则,只有包装内的包装符需要转义 - field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter)); } if (this.config.trimField) { // issue#I49M0C@Gitee @@ -388,7 +351,7 @@ public final class CsvParser extends ComputeIter implements Closeable, S * @return 是否结束 * @since 5.7.4 */ - private boolean isLineEnd(final char c, final int preChar) { + private boolean isLineEnd(final int c, final int preChar) { return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR; } @@ -409,89 +372,4 @@ public final class CsvParser extends ComputeIter implements Closeable, S || preChar == CharUtil.LF || preChar == CharUtil.CR; } - - /** - * 内部Buffer - * - * @author looly - */ - private static class Buffer implements Serializable { - private static final long serialVersionUID = 1L; - - final char[] buf; - - /** - * 标记位置,用于读数据 - */ - private int mark; - /** - * 当前位置 - */ - private int position; - /** - * 读取的数据长度,一般小于buf.length,-1表示无数据 - */ - private int limit; - - Buffer(final int capacity) { - buf = new char[capacity]; - } - - /** - * 是否还有未读数据 - * - * @return 是否还有未读数据 - */ - public final boolean hasRemaining() { - return position < limit; - } - - /** - * 读取到缓存
- * 全量读取,会重置Buffer中所有数据 - * - * @param reader {@link Reader} - */ - int read(final Reader reader) { - final int length; - try { - length = reader.read(this.buf); - } catch (final IOException e) { - throw new IORuntimeException(e); - } - this.mark = 0; - this.position = 0; - this.limit = length; - return length; - } - - /** - * 先获取当前字符,再将当前位置后移一位
- * 此方法不检查是否到了数组末尾,请自行使用{@link #hasRemaining()}判断。 - * - * @return 当前位置字符 - * @see #hasRemaining() - */ - char get() { - return this.buf[this.position++]; - } - - /** - * 标记位置记为下次读取位置 - */ - void mark() { - this.mark = this.position; - } - - /** - * 将数据追加到{@link StringBuilder},追加结束后需手动调用{@link #mark()} 重置读取位置 - * - * @param builder {@link StringBuilder} - * @param length 追加的长度 - * @see #mark() - */ - void appendTo(final StringBuilder builder, final int length) { - builder.append(this.buf, this.mark, length); - } - } } diff --git a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvParser2.java b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvParser2.java deleted file mode 100644 index fc9ab4f72..000000000 --- a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvParser2.java +++ /dev/null @@ -1,362 +0,0 @@ -/* - * Copyright (c) 2013-2024 Hutool Team and hutool.cn - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.dromara.hutool.poi.csv; - -import org.dromara.hutool.core.collection.iter.ComputeIter; -import org.dromara.hutool.core.io.IORuntimeException; -import org.dromara.hutool.core.map.MapUtil; -import org.dromara.hutool.core.text.CharUtil; -import org.dromara.hutool.core.text.StrTrimer; -import org.dromara.hutool.core.text.StrUtil; -import org.dromara.hutool.core.util.ObjUtil; - -import java.io.Closeable; -import java.io.IOException; -import java.io.Reader; -import java.io.Serializable; -import java.util.*; - -/** - * CSV行解析器,参考:FastCSV - * - * @author Looly - */ -public final class CsvParser2 extends ComputeIter implements Closeable, Serializable { - private static final long serialVersionUID = 1L; - - private static final int DEFAULT_ROW_CAPACITY = 10; - - private final CsvReadConfig config; - private final CsvTokener tokener; - /** - * 前一个特殊分界字符 - */ - private int preChar = -1; - /** - * 是否在引号包装内 - */ - private boolean inQuotes; - /** - * 当前读取字段 - */ - private final StringBuilder currentField = new StringBuilder(512); - - /** - * 标题行 - */ - private CsvRow header; - /** - * 当前行号 - */ - private long lineNo = -1; - /** - * 引号内的行数 - */ - private long inQuotesLineCount; - /** - * 第一行字段数,用于检查每行字段数是否一致 - */ - private int firstLineFieldCount = -1; - /** - * 最大字段数量,用于初始化行,减少扩容 - */ - private int maxFieldCount; - /** - * 是否读取结束 - */ - private boolean finished; - - /** - * CSV解析器 - * - * @param reader Reader - * @param config 配置,null则为默认配置 - */ - public CsvParser2(final Reader reader, final CsvReadConfig config) { - this.config = ObjUtil.defaultIfNull(config, CsvReadConfig::defaultConfig); - this.tokener = new CsvTokener(reader); - } - - /** - * 获取头部字段列表,如果headerLineNo < 0,抛出异常 - * - * @return 头部列表 - * @throws IllegalStateException 如果不解析头部或者没有调用nextRow()方法 - */ - public List getHeader() { - if (config.headerLineNo < 0) { - throw new IllegalStateException("No header available - header parsing is disabled"); - } - if (lineNo < config.beginLineNo) { - throw new IllegalStateException("No header available - call nextRow() first"); - } - return header.getRaw(); - } - - @Override - protected CsvRow computeNext() { - return nextRow(); - } - - /** - * 读取下一行数据 - * - * @return CsvRow - * @throws IORuntimeException IO读取异常 - */ - public CsvRow nextRow() throws IORuntimeException { - List currentFields; - int fieldCount; - while (!finished) { - currentFields = readLine(); - fieldCount = currentFields.size(); - if (fieldCount < 1) { - // 空List表示读取结束 - break; - } - - // 读取范围校验 - if (lineNo < config.beginLineNo) { - // 未达到读取起始行,继续 - continue; - } - if (lineNo > config.endLineNo) { - // 超出结束行,读取结束 - break; - } - - // 跳过空行 - if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) { - // [""]表示空行 - continue; - } - - // 检查每行的字段数是否一致 - if (config.errorOnDifferentFieldCount) { - if (firstLineFieldCount < 0) { - firstLineFieldCount = fieldCount; - } else if (fieldCount != firstLineFieldCount) { - throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount)); - } - } - - // 记录最大字段数 - if (fieldCount > maxFieldCount) { - maxFieldCount = fieldCount; - } - - //初始化标题 - if (lineNo == config.headerLineNo && null == header) { - initHeader(currentFields); - // 作为标题行后,此行跳过,下一行做为第一行 - continue; - } - - return new CsvRow(lineNo, null == header ? null : header.headerMap, currentFields); - } - - return null; - } - - /** - * 当前行做为标题行 - * - * @param currentFields 当前行字段列表 - */ - private void initHeader(final List currentFields) { - final Map localHeaderMap = new LinkedHashMap<>(currentFields.size()); - for (int i = 0; i < currentFields.size(); i++) { - String field = currentFields.get(i); - if (MapUtil.isNotEmpty(this.config.headerAlias)) { - // 自定义别名 - field = ObjUtil.defaultIfNull(this.config.headerAlias.get(field), field); - } - if (StrUtil.isNotEmpty(field) && !localHeaderMap.containsKey(field)) { - localHeaderMap.put(field, i); - } - } - - header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields)); - } - - /** - * 读取一行数据,如果读取结束,返回size为0的List
- * 空行是size为1的List,唯一元素是"" - * - *

- * 行号要考虑注释行和引号包装的内容中的换行 - *

- * - * @return 一行数据 - * @throws IORuntimeException IO异常 - */ - private List readLine() throws IORuntimeException { - // 矫正行号 - // 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上 - if (inQuotesLineCount > 0) { - this.lineNo += this.inQuotesLineCount; - this.inQuotesLineCount = 0; - } - - final List currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY); - - final StringBuilder currentField = this.currentField; - int preChar = this.preChar;//前一个特殊分界字符 - boolean inComment = false; - - int c; - while (true) { - c = tokener.next(); - if(c < 0){ - // 读取结束 - this.finished = true; - break; - } - - // 注释行标记 - if (preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF) { - // 判断行首字符为指定注释字符的注释开始,直到遇到换行符 - // 行首分两种,1是preChar < 0表示文本开始,2是换行符后紧跟就是下一行的开始 - // issue#IA8WE0 如果注释符出现在包装符内,被认为是普通字符 - if (!inQuotes && null != this.config.commentCharacter && c == this.config.commentCharacter) { - inComment = true; - } - } - // 注释行处理 - if (inComment) { - if (c == CharUtil.CR || c == CharUtil.LF) { - // 注释行以换行符为结尾 - lineNo++; - inComment = false; - } - // 跳过注释行中的任何字符 - continue; - } - - if (inQuotes) { - //引号内,作为内容,直到引号结束 - if (c == config.textDelimiter) { - // End of quoted text - inQuotes = false; - } else { - // 字段内容中新行 - if (isLineEnd(c, preChar)) { - inQuotesLineCount++; - } - } - // 普通字段字符 - currentField.append((char)c); - } else { - // 非引号内 - if (c == config.fieldSeparator) { - //一个字段结束 - addField(currentFields, currentField.toString()); - currentField.setLength(0); - } else if (c == config.textDelimiter && isFieldBegin(preChar)) { - // 引号开始且出现在字段开头 - inQuotes = true; - currentField.append((char)c); - } else if (c == CharUtil.CR) { - // \r - addField(currentFields, currentField.toString()); - currentField.setLength(0); - preChar = c; - break; - } else if (c == CharUtil.LF) { - // \n - if (preChar != CharUtil.CR) { - addField(currentFields, currentField.toString()); - currentField.setLength(0); - preChar = c; - break; - } - // 前一个字符是\r,已经处理过这个字段了,此处直接跳过 - } else { - currentField.append((char)c); - } - } - - preChar = c; - } - - // restore fields - this.preChar = preChar; - - lineNo++; - return currentFields; - } - - @Override - public void close() throws IOException { - tokener.close(); - } - - /** - * 将字段加入字段列表并自动去包装和去转义 - * - * @param currentFields 当前的字段列表(即为行) - * @param field 字段 - */ - private void addField(final List currentFields, String field) { - final char textDelimiter = this.config.textDelimiter; - - // 忽略多余引号后的换行符 - field = StrUtil.trim(field, StrTrimer.TrimMode.SUFFIX, (c -> c == CharUtil.LF || c == CharUtil.CR)); - - if(StrUtil.isWrap(field, textDelimiter)){ - field = StrUtil.sub(field, 1, field.length() - 1); - // https://datatracker.ietf.org/doc/html/rfc4180#section-2 - // 第七条规则,只有包装内的包装符需要转义 - field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter)); - } - if (this.config.trimField) { - // issue#I49M0C@Gitee - field = StrUtil.trim(field); - } - currentFields.add(field); - } - - /** - * 是否行结束符 - * - * @param c 符号 - * @param preChar 前一个字符 - * @return 是否结束 - * @since 5.7.4 - */ - private boolean isLineEnd(final int c, final int preChar) { - return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR; - } - - /** - * 通过前一个字符,判断是否字段开始,几种情况: - *
    - *
  • 正文开头,无前字符
  • - *
  • 字段分隔符,即上个字段结束
  • - *
  • 换行符,即新行开始
  • - *
- * - * @param preChar 前字符 - * @return 是否字段开始 - */ - private boolean isFieldBegin(final int preChar) { - return preChar == -1 - || preChar == config.fieldSeparator - || preChar == CharUtil.LF - || preChar == CharUtil.CR; - } -} diff --git a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvReadConfig.java b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvReadConfig.java index 7d179ba30..6e74a69e2 100644 --- a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvReadConfig.java +++ b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvReadConfig.java @@ -45,7 +45,7 @@ public class CsvReadConfig extends CsvConfig implements Serializa * * @return 默认配置 */ - public static CsvReadConfig defaultConfig() { + public static CsvReadConfig of() { return new CsvReadConfig(); } diff --git a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvTokener.java b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvTokener.java index 868a75b29..b54379d76 100644 --- a/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvTokener.java +++ b/hutool-poi/src/main/java/org/dromara/hutool/poi/csv/CsvTokener.java @@ -24,6 +24,12 @@ import java.io.Closeable; import java.io.IOException; import java.io.Reader; +/** + * CSV解析器,用于解析CSV文件 + * + * @author looly + * @since 5.8.0 + */ public class CsvTokener extends SimpleWrapper implements Closeable { /** @@ -45,7 +51,7 @@ public class CsvTokener extends SimpleWrapper implements Closeable { * @param reader {@link Reader} */ public CsvTokener(final Reader reader) { - super(reader); + super(IoUtil.toBuffered(reader)); } /** @@ -56,12 +62,12 @@ public class CsvTokener extends SimpleWrapper implements Closeable { public int next() { if(this.usePrev){ this.usePrev = false; - return this.prev; - } - try { - this.prev = this.raw.read(); - } catch (final IOException e) { - throw new IORuntimeException(e); + }else{ + try { + this.prev = this.raw.read(); + } catch (final IOException e) { + throw new IORuntimeException(e); + } } this.index++; return this.prev; @@ -80,6 +86,15 @@ public class CsvTokener extends SimpleWrapper implements Closeable { this.usePrev = true; } + /** + * 获取当前位置 + * + * @return 位置 + */ + public long getIndex() { + return this.index; + } + @Override public void close() throws IOException { IoUtil.nullSafeClose(this.raw); diff --git a/hutool-poi/src/main/java/org/dromara/hutool/poi/excel/sax/ExcelSaxUtil.java b/hutool-poi/src/main/java/org/dromara/hutool/poi/excel/sax/ExcelSaxUtil.java index 2a50fe9d7..f654e9222 100644 --- a/hutool-poi/src/main/java/org/dromara/hutool/poi/excel/sax/ExcelSaxUtil.java +++ b/hutool-poi/src/main/java/org/dromara/hutool/poi/excel/sax/ExcelSaxUtil.java @@ -287,9 +287,9 @@ public class ExcelSaxUtil { return null; } - // issue#IB0EJ9 可能精度丢失 + // issue#IB0EJ9 可能精度丢失,对含有小数的value判断并转为BigDecimal final double number = Double.parseDouble(value); - if(false == value.equals(Double.toString(number))){ + if(StrUtil.contains(value, CharUtil.DOT) && !value.equals(Double.toString(number))){ // 精度丢失 return NumberUtil.toBigDecimal(value); } diff --git a/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvParserTest.java b/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvParserTest.java index 83bab13fb..d64a59a8a 100644 --- a/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvParserTest.java +++ b/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvParserTest.java @@ -17,7 +17,6 @@ package org.dromara.hutool.poi.csv; import org.dromara.hutool.core.io.IoUtil; -import org.dromara.hutool.core.lang.Console; import org.dromara.hutool.core.text.StrUtil; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -29,7 +28,7 @@ public class CsvParserTest { @Test public void parseTest1() { final StringReader reader = StrUtil.getReader("aaa,b\"bba\",ccc"); - final CsvParser2 parser = new CsvParser2(reader, null); + final CsvParser parser = new CsvParser(reader, null); final CsvRow row = parser.nextRow(); //noinspection ConstantConditions Assertions.assertEquals("b\"bba\"", row.getRaw().get(1)); @@ -39,7 +38,7 @@ public class CsvParserTest { @Test public void parseTest2() { final StringReader reader = StrUtil.getReader("aaa,\"bba\"bbb,ccc"); - final CsvParser2 parser = new CsvParser2(reader, null); + final CsvParser parser = new CsvParser(reader, null); final CsvRow row = parser.nextRow(); //noinspection ConstantConditions Assertions.assertEquals("\"bba\"bbb", row.getRaw().get(1)); @@ -49,7 +48,7 @@ public class CsvParserTest { @Test public void parseTest3() { final StringReader reader = StrUtil.getReader("aaa,\"bba\",ccc"); - final CsvParser2 parser = new CsvParser2(reader, null); + final CsvParser parser = new CsvParser(reader, null); final CsvRow row = parser.nextRow(); //noinspection ConstantConditions Assertions.assertEquals("bba", row.getRaw().get(1)); @@ -59,7 +58,7 @@ public class CsvParserTest { @Test public void parseTest4() { final StringReader reader = StrUtil.getReader("aaa,\"\",ccc"); - final CsvParser2 parser = new CsvParser2(reader, null); + final CsvParser parser = new CsvParser(reader, null); final CsvRow row = parser.nextRow(); //noinspection ConstantConditions Assertions.assertEquals("", row.getRaw().get(1)); @@ -80,9 +79,36 @@ public class CsvParserTest { @Test void issueIB5UQ8Test() { - String csv = "\"Consultancy, 10\"\",, food\""; + final String csv = "\"Consultancy, 10\"\",, food\""; final CsvReader reader = CsvUtil.getReader(new StringReader(csv)); final String s = reader.read().getRow(0).get(0); - Console.log(s); + Assertions.assertEquals("Consultancy, 10\",, food", s); + } + + @Test + void textDelimiterAtEndTest() { + final String csv = "\"Consultancy, 10\""; + final CsvReader reader = CsvUtil.getReader(new StringReader(csv)); + final String s = reader.read().getRow(0).get(0); + Assertions.assertEquals("Consultancy, 10", s); + } + + @Test + void textDelimiterUncloseTest() { + // 未闭合的文本包装符,文本结尾自动结束,文本包装符 + final String csv = "\"Consultancy,"; + final CsvReader reader = CsvUtil.getReader(new StringReader(csv)); + final String s = reader.read().getRow(0).get(0); + Assertions.assertEquals("Consultancy,", s); + } + + @Test + void textDelimiterOfCount3Test() { + // 未闭合的文本包装符,文本结尾自动结束,文本包装符 + final String csv = "\"\"\""; + final CsvParser csvParser = new CsvParser(new StringReader(csv), CsvReadConfig.of().setSkipEmptyRows(false)); + final CsvRow row = csvParser.nextRow(); + Assertions.assertNotNull(row); + Assertions.assertEquals("\"", row.get(0)); } } diff --git a/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvReaderTest.java b/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvReaderTest.java index e6731dc26..e17c4b929 100644 --- a/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvReaderTest.java +++ b/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvReaderTest.java @@ -68,7 +68,7 @@ public class CsvReaderTest { @Test public void readAliasMapListTest() { - final CsvReadConfig csvReadConfig = CsvReadConfig.defaultConfig(); + final CsvReadConfig csvReadConfig = CsvReadConfig.of(); csvReadConfig.addHeaderAlias("姓名", "name"); final CsvReader reader = CsvUtil.getReader(csvReadConfig); @@ -135,7 +135,7 @@ public class CsvReaderTest { @Test @Disabled public void readTest3() { - final CsvReadConfig csvReadConfig = CsvReadConfig.defaultConfig(); + final CsvReadConfig csvReadConfig = CsvReadConfig.of(); csvReadConfig.setContainsHeader(true); final CsvReader reader = CsvUtil.getReader(csvReadConfig); final CsvData read = reader.read(FileUtil.file("d:/test/ceshi.csv")); @@ -164,7 +164,7 @@ public class CsvReaderTest { @Test public void lineLimitTest() { // 从原始第2行开始读取 - final CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2)); + final CsvReader reader = new CsvReader(CsvReadConfig.of().setBeginLineNo(2)); final CsvData data = reader.read( ResourceUtil.getUtf8Reader("test_lines.csv"), true); @@ -183,7 +183,7 @@ public class CsvReaderTest { @Test public void lineLimitWithHeaderTest() { // 从原始第2行开始读取 - final CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2).setContainsHeader(true)); + final CsvReader reader = new CsvReader(CsvReadConfig.of().setBeginLineNo(2).setContainsHeader(true)); final CsvData data = reader.read( ResourceUtil.getUtf8Reader("test_lines.csv"), true); @@ -199,7 +199,7 @@ public class CsvReaderTest { @Test public void customConfigTest() { final CsvReader reader = CsvUtil.getReader( - CsvReadConfig.defaultConfig() + CsvReadConfig.of() .setTextDelimiter('\'') .setFieldSeparator(';')); final CsvData csvRows = reader.readFromStr("123;456;'789;0'abc;"); @@ -211,7 +211,7 @@ public class CsvReaderTest { @Test public void readDisableCommentTest() { - final CsvReader reader = CsvUtil.getReader(CsvReadConfig.defaultConfig().disableComment()); + final CsvReader reader = CsvUtil.getReader(CsvReadConfig.of().disableComment()); final CsvData read = reader.read( ResourceUtil.getUtf8Reader("test.csv"), true); final CsvRow row = read.getRow(0); diff --git a/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvUtilTest.java b/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvUtilTest.java index 7918db440..cf78a4d81 100644 --- a/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvUtilTest.java +++ b/hutool-poi/src/test/java/org/dromara/hutool/poi/csv/CsvUtilTest.java @@ -48,11 +48,12 @@ public class CsvUtilTest { Assertions.assertEquals("关注\"对象\"", row0.get(3)); Assertions.assertEquals("年龄", row0.get(4)); Assertions.assertEquals("", row0.get(5)); - Assertions.assertEquals("\"", row0.get(6)); + // 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n + Assertions.assertEquals("\"\n", row0.get(6)); } @Test - public void readTest2() { + public void readUseConsumerTest() { final CsvReader reader = CsvUtil.getReader(); reader.read(FileUtil.getUtf8Reader("test.csv"), true, (csvRow)-> { // 只有一行,所以直接判断 @@ -62,7 +63,8 @@ public class CsvUtilTest { Assertions.assertEquals("关注\"对象\"", csvRow.get(3)); Assertions.assertEquals("年龄", csvRow.get(4)); Assertions.assertEquals("", csvRow.get(5)); - Assertions.assertEquals("\"", csvRow.get(6)); + // 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n + Assertions.assertEquals("\"\n", csvRow.get(6)); }); } @@ -75,7 +77,7 @@ public class CsvUtilTest { } @Test - public void readCsvStr1(){ + public void readCsvStr1WithUncloseTest(){ final CsvData data = CsvUtil.getReader().readFromStr("# 这是一行注释,读取时应忽略\n" + "\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n"); final List rows = data.getRows(); @@ -86,11 +88,28 @@ public class CsvUtilTest { Assertions.assertEquals("关注\"对象\"", row0.get(3)); Assertions.assertEquals("年龄", row0.get(4)); Assertions.assertEquals("", row0.get(5)); + // 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n + Assertions.assertEquals("\"\n", row0.get(6)); + } + + @Test + public void readCsvStr1WithUncloseTrimTest(){ + final CsvData data = CsvUtil.getReader(CsvReadConfig.of().setTrimField(true)) + .readFromStr("# 这是一行注释,读取时应忽略\n" + + "\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n"); + final List rows = data.getRows(); + final CsvRow row0 = rows.get(0); + Assertions.assertEquals("sss,sss", row0.get(0)); + Assertions.assertEquals("姓名", row0.get(1)); + Assertions.assertEquals("性别", row0.get(2)); + Assertions.assertEquals("关注\"对象\"", row0.get(3)); + Assertions.assertEquals("年龄", row0.get(4)); + Assertions.assertEquals("", row0.get(5)); Assertions.assertEquals("\"", row0.get(6)); } @Test - public void readCsvStr2(){ + public void readCsvStrUseConsumerTest(){ CsvUtil.getReader().readFromStr("# 这是一行注释,读取时应忽略\n" + "\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n",(csvRow)-> { // 只有一行,所以直接判断 @@ -100,7 +119,8 @@ public class CsvUtilTest { Assertions.assertEquals("关注\"对象\"", csvRow.get(3)); Assertions.assertEquals("年龄", csvRow.get(4)); Assertions.assertEquals("", csvRow.get(5)); - Assertions.assertEquals("\"", csvRow.get(6)); + // 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n + Assertions.assertEquals("\"\n", csvRow.get(6)); }); }