This commit is contained in:
Looly
2022-04-28 01:30:17 +08:00
parent e0ac5e9961
commit d78219c60c
248 changed files with 621 additions and 3407 deletions

View File

@@ -9,7 +9,7 @@
<parent>
<groupId>cn.hutool</groupId>
<artifactId>hutool-parent</artifactId>
<version>5.8.0.M5</version>
<version>6.0.0.M1</version>
</parent>
<artifactId>hutool-poi</artifactId>

View File

@@ -0,0 +1,280 @@
package cn.hutool.poi.csv;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IORuntimeException;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.ObjectUtil;
import java.io.File;
import java.io.Reader;
import java.io.Serializable;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
* CSV文件读取器基础类提供灵活的文件、路径中的CSV读取一次构造可多次调用读取不同数据参考FastCSV
*
* @author Looly
* @since 5.0.4
*/
public class CsvBaseReader implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 默认编码
*/
protected static final Charset DEFAULT_CHARSET = CharsetUtil.CHARSET_UTF_8;
private final CsvReadConfig config;
//--------------------------------------------------------------------------------------------- Constructor start
/**
* 构造,使用默认配置项
*/
public CsvBaseReader() {
this(null);
}
/**
* 构造
*
* @param config 配置项
*/
public CsvBaseReader(CsvReadConfig config) {
this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig::defaultConfig);
}
//--------------------------------------------------------------------------------------------- Constructor end
/**
* 设置字段分隔符,默认逗号','
*
* @param fieldSeparator 字段分隔符,默认逗号','
*/
public void setFieldSeparator(char fieldSeparator) {
this.config.setFieldSeparator(fieldSeparator);
}
/**
* 设置 文本分隔符,文本包装符,默认双引号'"'
*
* @param textDelimiter 文本分隔符,文本包装符,默认双引号'"'
*/
public void setTextDelimiter(char textDelimiter) {
this.config.setTextDelimiter(textDelimiter);
}
/**
* 设置是否首行做为标题行默认false
*
* @param containsHeader 是否首行做为标题行默认false
*/
public void setContainsHeader(boolean containsHeader) {
this.config.setContainsHeader(containsHeader);
}
/**
* 设置是否跳过空白行默认true
*
* @param skipEmptyRows 是否跳过空白行默认true
*/
public void setSkipEmptyRows(boolean skipEmptyRows) {
this.config.setSkipEmptyRows(skipEmptyRows);
}
/**
* 设置每行字段个数不同时是否抛出异常默认false
*
* @param errorOnDifferentFieldCount 每行字段个数不同时是否抛出异常默认false
*/
public void setErrorOnDifferentFieldCount(boolean errorOnDifferentFieldCount) {
this.config.setErrorOnDifferentFieldCount(errorOnDifferentFieldCount);
}
/**
* 读取CSV文件默认UTF-8编码
*
* @param file CSV文件
* @return {@link CsvData},包含数据列表和行信息
* @throws IORuntimeException IO异常
*/
public CsvData read(File file) throws IORuntimeException {
return read(file, DEFAULT_CHARSET);
}
/**
* 从字符串中读取CSV数据
*
* @param csvStr CSV字符串
* @return {@link CsvData},包含数据列表和行信息
*/
public CsvData readFromStr(String csvStr) {
return read(new StringReader(csvStr));
}
/**
* 从字符串中读取CSV数据
*
* @param csvStr CSV字符串
* @param rowHandler 行处理器,用于一行一行的处理数据
*/
public void readFromStr(String csvStr, CsvRowHandler rowHandler) {
read(parse(new StringReader(csvStr)), rowHandler);
}
/**
* 读取CSV文件
*
* @param file CSV文件
* @param charset 文件编码,默认系统编码
* @return {@link CsvData},包含数据列表和行信息
* @throws IORuntimeException IO异常
*/
public CsvData read(File file, Charset charset) throws IORuntimeException {
return read(Objects.requireNonNull(file.toPath(), "file must not be null"), charset);
}
/**
* 读取CSV文件默认UTF-8编码
*
* @param path CSV文件
* @return {@link CsvData},包含数据列表和行信息
* @throws IORuntimeException IO异常
*/
public CsvData read(Path path) throws IORuntimeException {
return read(path, DEFAULT_CHARSET);
}
/**
* 读取CSV文件
*
* @param path CSV文件
* @param charset 文件编码,默认系统编码
* @return {@link CsvData},包含数据列表和行信息
* @throws IORuntimeException IO异常
*/
public CsvData read(Path path, Charset charset) throws IORuntimeException {
Assert.notNull(path, "path must not be null");
return read(FileUtil.getReader(path, charset));
}
/**
* 从Reader中读取CSV数据读取后关闭Reader
*
* @param reader Reader
* @return {@link CsvData},包含数据列表和行信息
* @throws IORuntimeException IO异常
*/
public CsvData read(Reader reader) throws IORuntimeException {
final CsvParser csvParser = parse(reader);
final List<CsvRow> rows = new ArrayList<>();
read(csvParser, rows::add);
final List<String> header = config.headerLineNo > -1 ? csvParser.getHeader() : null;
return new CsvData(header, rows);
}
/**
* 从Reader中读取CSV数据结果为Map读取后关闭Reader。<br>
* 此方法默认识别首行为标题行。
*
* @param reader Reader
* @return {@link CsvData},包含数据列表和行信息
* @throws IORuntimeException IO异常
*/
public List<Map<String, String>> readMapList(Reader reader) throws IORuntimeException {
// 此方法必须包含标题
this.config.setContainsHeader(true);
final List<Map<String, String>> result = new ArrayList<>();
read(reader, (row) -> result.add(row.getFieldMap()));
return result;
}
/**
* 从Reader中读取CSV数据并转换为Bean列表读取后关闭Reader。<br>
* 此方法默认识别首行为标题行。
*
* @param <T> Bean类型
* @param reader Reader
* @param clazz Bean类型
* @return Bean列表
*/
public <T> List<T> read(Reader reader, Class<T> clazz) {
// 此方法必须包含标题
this.config.setContainsHeader(true);
final List<T> result = new ArrayList<>();
read(reader, (row) -> result.add(row.toBean(clazz)));
return result;
}
/**
* 从字符串中读取CSV数据并转换为Bean列表读取后关闭Reader。<br>
* 此方法默认识别首行为标题行。
*
* @param <T> Bean类型
* @param csvStr csv字符串
* @param clazz Bean类型
* @return Bean列表
*/
public <T> List<T> read(String csvStr, Class<T> clazz) {
// 此方法必须包含标题
this.config.setContainsHeader(true);
final List<T> result = new ArrayList<>();
read(new StringReader(csvStr), (row) -> result.add(row.toBean(clazz)));
return result;
}
/**
* 从Reader中读取CSV数据读取后关闭Reader
*
* @param reader Reader
* @param rowHandler 行处理器,用于一行一行的处理数据
* @throws IORuntimeException IO异常
*/
public void read(Reader reader, CsvRowHandler rowHandler) throws IORuntimeException {
read(parse(reader), rowHandler);
}
//--------------------------------------------------------------------------------------------- Private method start
/**
* 读取CSV数据读取后关闭Parser
*
* @param csvParser CSV解析器
* @param rowHandler 行处理器,用于一行一行的处理数据
* @throws IORuntimeException IO异常
* @since 5.0.4
*/
private void read(CsvParser csvParser, CsvRowHandler rowHandler) throws IORuntimeException {
try {
while (csvParser.hasNext()){
rowHandler.handle(csvParser.next());
}
} finally {
IoUtil.close(csvParser);
}
}
/**
* 构建 {@link CsvParser}
*
* @param reader Reader
* @return CsvParser
* @throws IORuntimeException IO异常
*/
protected CsvParser parse(Reader reader) throws IORuntimeException {
return new CsvParser(reader, this.config);
}
//--------------------------------------------------------------------------------------------- Private method start
}

View File

@@ -0,0 +1,119 @@
package cn.hutool.poi.csv;
import cn.hutool.core.util.CharUtil;
import java.io.Serializable;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* CSV基础配置项此配置项可用于读取和写出CSV定义了包括字段分隔符、文本包装符等符号
*
* @param <T> 继承子类类型用于this返回
* @author looly
* @since 4.0.5
*/
@SuppressWarnings("unchecked")
public class CsvConfig<T extends CsvConfig<T>> implements Serializable {
private static final long serialVersionUID = -8069578249066158459L;
/**
* 字段分隔符,默认逗号','
*/
protected char fieldSeparator = CharUtil.COMMA;
/**
* 文本包装符,默认双引号'"'
*/
protected char textDelimiter = CharUtil.DOUBLE_QUOTES;
/**
* 注释符号,用于区分注释行,默认'#'
*/
protected Character commentCharacter = '#';
/**
* 标题别名
*/
protected Map<String, String> headerAlias = new LinkedHashMap<>();
/**
* 设置字段分隔符,默认逗号','
*
* @param fieldSeparator 字段分隔符,默认逗号','
* @return this
*/
public T setFieldSeparator(final char fieldSeparator) {
this.fieldSeparator = fieldSeparator;
return (T) this;
}
/**
* 设置 文本分隔符,文本包装符,默认双引号'"'
*
* @param textDelimiter 文本分隔符,文本包装符,默认双引号'"'
* @return this
*/
public T setTextDelimiter(char textDelimiter) {
this.textDelimiter = textDelimiter;
return (T) this;
}
/**
* 设置注释无效<br>
* 当写出CSV时{@link CsvWriter#writeComment(String)}将抛出异常<br>
* 当读取CSV时注释行按照正常行读取
*
* @return this
* @since 5.7.14
*/
public T disableComment() {
return setCommentCharacter(null);
}
/**
* 设置 注释符号,用于区分注释行,{@code null}表示忽略注释
*
* @param commentCharacter 注释符号,用于区分注释行
* @return this
* @since 5.5.7
*/
public T setCommentCharacter(Character commentCharacter) {
this.commentCharacter = commentCharacter;
return (T) this;
}
/**
* 设置标题行的别名Map
*
* @param headerAlias 别名Map
* @return this
* @since 5.7.10
*/
public T setHeaderAlias(Map<String, String> headerAlias) {
this.headerAlias = headerAlias;
return (T) this;
}
/**
* 增加标题别名
*
* @param header 标题
* @param alias 别名
* @return this
* @since 5.7.10
*/
public T addHeaderAlias(String header, String alias) {
this.headerAlias.put(header, alias);
return (T) this;
}
/**
* 去除标题别名
*
* @param header 标题
* @return this
* @since 5.7.10
*/
public T removeHeaderAlias(String header) {
this.headerAlias.remove(header);
return (T) this;
}
}

View File

@@ -0,0 +1,80 @@
package cn.hutool.poi.csv;
import java.io.Serializable;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* CSV数据包括头部信息和行数据参考FastCSV
*
* @author Looly
*/
public class CsvData implements Iterable<CsvRow>, Serializable {
private static final long serialVersionUID = 1L;
private final List<String> header;
private final List<CsvRow> rows;
/**
* 构造
*
* @param header 头信息, 可以为null
* @param rows 行
*/
public CsvData(final List<String> header, final List<CsvRow> rows) {
this.header = header;
this.rows = rows;
}
/**
* 总行数
*
* @return 总行数
*/
public int getRowCount() {
return this.rows.size();
}
/**
* 获取头信息列表,如果无头信息为{@code Null},返回列表为只读列表
*
* @return the header row - might be {@code null} if no header exists
*/
public List<String> getHeader() {
return Collections.unmodifiableList(this.header);
}
/**
* 获取指定行从0开始
*
* @param index 行号
* @return 行数据
* @throws IndexOutOfBoundsException if index is out of range
*/
public CsvRow getRow(final int index) {
return this.rows.get(index);
}
/**
* 获取所有行
*
* @return 所有行
*/
public List<CsvRow> getRows() {
return this.rows;
}
@Override
public Iterator<CsvRow> iterator() {
return this.rows.iterator();
}
@Override
public String toString() {
return "CsvData{" +
"header=" + header +
", rows=" + rows +
'}';
}
}

View File

@@ -0,0 +1,447 @@
package cn.hutool.poi.csv;
import cn.hutool.core.collection.ComputeIter;
import cn.hutool.core.io.IORuntimeException;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.text.StrBuilder;
import cn.hutool.core.util.CharUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
/**
* CSV行解析器参考FastCSV
*
* @author Looly
*/
public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, Serializable {
private static final long serialVersionUID = 1L;
private static final int DEFAULT_ROW_CAPACITY = 10;
private final Reader reader;
private final CsvReadConfig config;
private final Buffer buf = new Buffer(IoUtil.DEFAULT_LARGE_BUFFER_SIZE);
/**
* 前一个特殊分界字符
*/
private int preChar = -1;
/**
* 是否在引号包装内
*/
private boolean inQuotes;
/**
* 当前读取字段
*/
private final StrBuilder currentField = new StrBuilder(512);
/**
* 标题行
*/
private CsvRow header;
/**
* 当前行号
*/
private long lineNo = -1;
/**
* 引号内的行数
*/
private long inQuotesLineCount;
/**
* 第一行字段数,用于检查每行字段数是否一致
*/
private int firstLineFieldCount = -1;
/**
* 最大字段数量,用于初始化行,减少扩容
*/
private int maxFieldCount;
/**
* 是否读取结束
*/
private boolean finished;
/**
* CSV解析器
*
* @param reader Reader
* @param config 配置null则为默认配置
*/
public CsvParser(final Reader reader, CsvReadConfig config) {
this.reader = Objects.requireNonNull(reader, "reader must not be null");
this.config = ObjectUtil.defaultIfNull(config, CsvReadConfig::defaultConfig);
}
/**
* 获取头部字段列表如果headerLineNo &lt; 0抛出异常
*
* @return 头部列表
* @throws IllegalStateException 如果不解析头部或者没有调用nextRow()方法
*/
public List<String> getHeader() {
if (config.headerLineNo < 0) {
throw new IllegalStateException("No header available - header parsing is disabled");
}
if (lineNo < config.beginLineNo) {
throw new IllegalStateException("No header available - call nextRow() first");
}
return header.fields;
}
@Override
protected CsvRow computeNext() {
return nextRow();
}
/**
* 读取下一行数据
*
* @return CsvRow
* @throws IORuntimeException IO读取异常
*/
public CsvRow nextRow() throws IORuntimeException {
List<String> currentFields;
int fieldCount;
while (false == finished) {
currentFields = readLine();
fieldCount = currentFields.size();
if (fieldCount < 1) {
// 空List表示读取结束
break;
}
// 读取范围校验
if(lineNo < config.beginLineNo){
// 未达到读取起始行,继续
continue;
}
if(lineNo > config.endLineNo){
// 超出结束行,读取结束
break;
}
// 跳过空行
if (config.skipEmptyRows && fieldCount == 1 && currentFields.get(0).isEmpty()) {
// [""]表示空行
continue;
}
// 检查每行的字段数是否一致
if (config.errorOnDifferentFieldCount) {
if (firstLineFieldCount < 0) {
firstLineFieldCount = fieldCount;
} else if (fieldCount != firstLineFieldCount) {
throw new IORuntimeException(String.format("Line %d has %d fields, but first line has %d fields", lineNo, fieldCount, firstLineFieldCount));
}
}
// 记录最大字段数
if (fieldCount > maxFieldCount) {
maxFieldCount = fieldCount;
}
//初始化标题
if (lineNo == config.headerLineNo && null == header) {
initHeader(currentFields);
// 作为标题行后,此行跳过,下一行做为第一行
continue;
}
return new CsvRow(lineNo, null == header ? null : header.headerMap, currentFields);
}
return null;
}
/**
* 当前行做为标题行
*
* @param currentFields 当前行字段列表
*/
private void initHeader(final List<String> currentFields) {
final Map<String, Integer> localHeaderMap = new LinkedHashMap<>(currentFields.size());
for (int i = 0; i < currentFields.size(); i++) {
String field = currentFields.get(i);
if (MapUtil.isNotEmpty(this.config.headerAlias)) {
// 自定义别名
field = ObjectUtil.defaultIfNull(this.config.headerAlias.get(field), field);
}
if (StrUtil.isNotEmpty(field) && false == localHeaderMap.containsKey(field)) {
localHeaderMap.put(field, i);
}
}
header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields));
}
/**
* 读取一行数据如果读取结束返回size为0的List<br>
* 空行是size为1的List唯一元素是""
*
* <p>
* 行号要考虑注释行和引号包装的内容中的换行
* </p>
*
* @return 一行数据
* @throws IORuntimeException IO异常
*/
private List<String> readLine() throws IORuntimeException {
// 矫正行号
// 当一行内容包含多行数据时,记录首行行号,但是读取下一行时,需要把多行内容的行数加上
if(inQuotesLineCount > 0){
this.lineNo += this.inQuotesLineCount;
this.inQuotesLineCount = 0;
}
final List<String> currentFields = new ArrayList<>(maxFieldCount > 0 ? maxFieldCount : DEFAULT_ROW_CAPACITY);
final StrBuilder currentField = this.currentField;
final Buffer buf = this.buf;
int preChar = this.preChar;//前一个特殊分界字符
int copyLen = 0; //拷贝长度
boolean inComment = false;
while (true) {
if (false == buf.hasRemaining()) {
// 此Buffer读取结束开始读取下一段
if (copyLen > 0) {
buf.appendTo(currentField, copyLen);
// 此处无需markread方法会重置mark
}
if (buf.read(this.reader) < 0) {
// CSV读取结束
finished = true;
if (currentField.hasContent() || preChar == config.fieldSeparator) {
//剩余部分作为一个字段
addField(currentFields, currentField.toStringAndReset());
}
break;
}
//重置
copyLen = 0;
}
final char c = buf.get();
// 注释行标记
if(preChar < 0 || preChar == CharUtil.CR || preChar == CharUtil.LF){
// 判断行首字符为指定注释字符的注释开始,直到遇到换行符
// 行首分两种1是preChar < 0表示文本开始2是换行符后紧跟就是下一行的开始
if(null != this.config.commentCharacter && c == this.config.commentCharacter){
inComment = true;
}
}
// 注释行处理
if(inComment){
if (c == CharUtil.CR || c == CharUtil.LF) {
// 注释行以换行符为结尾
lineNo++;
inComment = false;
}
// 跳过注释行中的任何字符
buf.mark();
preChar = c;
continue;
}
if (inQuotes) {
//引号内,作为内容,直到引号结束
if (c == config.textDelimiter) {
// End of quoted text
inQuotes = false;
} else {
// 字段内容中新行
if (isLineEnd(c, preChar)) {
inQuotesLineCount++;
}
}
// 普通字段字符
copyLen++;
} else {
// 非引号内
if (c == config.fieldSeparator) {
//一个字段结束
if (copyLen > 0) {
buf.appendTo(currentField, copyLen);
copyLen = 0;
}
buf.mark();
addField(currentFields, currentField.toStringAndReset());
} else if (c == config.textDelimiter) {
// 引号开始
inQuotes = true;
copyLen++;
} else if (c == CharUtil.CR) {
// \r直接结束
if (copyLen > 0) {
buf.appendTo(currentField, copyLen);
}
buf.mark();
addField(currentFields, currentField.toStringAndReset());
preChar = c;
break;
} else if (c == CharUtil.LF) {
// \n
if (preChar != CharUtil.CR) {
if (copyLen > 0) {
buf.appendTo(currentField, copyLen);
}
buf.mark();
addField(currentFields, currentField.toStringAndReset());
preChar = c;
break;
}
// 前一个字符是\r已经处理过这个字段了此处直接跳过
buf.mark();
} else {
// 普通字符
copyLen++;
}
}
preChar = c;
}
// restore fields
this.preChar = preChar;
lineNo++;
return currentFields;
}
@Override
public void close() throws IOException {
reader.close();
}
/**
* 将字段加入字段列表并自动去包装和去转义
*
* @param currentFields 当前的字段列表(即为行)
* @param field 字段
*/
private void addField(List<String> currentFields, String field) {
final char textDelimiter = this.config.textDelimiter;
// 忽略多余引号后的换行符
field = StrUtil.trim(field, 1, (c-> c == CharUtil.LF || c == CharUtil.CR));
field = StrUtil.unWrap(field, textDelimiter);
field = StrUtil.replace(field, "" + textDelimiter + textDelimiter, textDelimiter + "");
if(this.config.trimField){
// issue#I49M0C@Gitee
field = StrUtil.trim(field);
}
currentFields.add(field);
}
/**
* 是否行结束符
*
* @param c 符号
* @param preChar 前一个字符
* @return 是否结束
* @since 5.7.4
*/
private boolean isLineEnd(char c, int preChar) {
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
}
/**
* 内部Buffer
*
* @author looly
*/
private static class Buffer implements Serializable{
private static final long serialVersionUID = 1L;
final char[] buf;
/**
* 标记位置,用于读数据
*/
private int mark;
/**
* 当前位置
*/
private int position;
/**
* 读取的数据长度一般小于buf.length-1表示无数据
*/
private int limit;
Buffer(int capacity) {
buf = new char[capacity];
}
/**
* 是否还有未读数据
*
* @return 是否还有未读数据
*/
public final boolean hasRemaining() {
return position < limit;
}
/**
* 读取到缓存<br>
* 全量读取会重置Buffer中所有数据
*
* @param reader {@link Reader}
*/
int read(Reader reader) {
int length;
try {
length = reader.read(this.buf);
} catch (IOException e) {
throw new IORuntimeException(e);
}
this.mark = 0;
this.position = 0;
this.limit = length;
return length;
}
/**
* 先获取当前字符,再将当前位置后移一位<br>
* 此方法不检查是否到了数组末尾,请自行使用{@link #hasRemaining()}判断。
*
* @return 当前位置字符
* @see #hasRemaining()
*/
char get() {
return this.buf[this.position++];
}
/**
* 标记位置记为下次读取位置
*/
void mark() {
this.mark = this.position;
}
/**
* 将数据追加到{@link StrBuilder},追加结束后需手动调用{@link #mark()} 重置读取位置
*
* @param builder {@link StrBuilder}
* @param length 追加的长度
* @see #mark()
*/
void appendTo(StrBuilder builder, int length) {
builder.append(this.buf, this.mark, length);
}
}
}

View File

@@ -0,0 +1,118 @@
package cn.hutool.poi.csv;
import java.io.Serializable;
/**
* CSV读取配置项
*
* @author looly
*
*/
public class CsvReadConfig extends CsvConfig<CsvReadConfig> implements Serializable {
private static final long serialVersionUID = 5396453565371560052L;
/** 指定标题行号,-1表示无标题行 */
protected long headerLineNo = -1;
/** 是否跳过空白行默认true */
protected boolean skipEmptyRows = true;
/** 每行字段个数不同时是否抛出异常默认false */
protected boolean errorOnDifferentFieldCount;
/** 定义开始的行(包括),此处为原始文件行号 */
protected long beginLineNo;
/** 结束的行(包括),此处为原始文件行号 */
protected long endLineNo = Long.MAX_VALUE-1;
/** 每个字段是否去除两边空白符 */
protected boolean trimField;
/**
* 默认配置
*
* @return 默认配置
*/
public static CsvReadConfig defaultConfig() {
return new CsvReadConfig();
}
/**
* 设置是否首行做为标题行默认false<br>
* 当设置为{@code true}时,默认标题行号是{@link #beginLineNo}{@code false}为-1表示无行号
*
* @param containsHeader 是否首行做为标题行默认false
* @return this
* @see #setHeaderLineNo(long)
*/
public CsvReadConfig setContainsHeader(boolean containsHeader) {
return setHeaderLineNo(containsHeader ? beginLineNo : -1);
}
/**
* 设置标题行行号,默认-1表示无标题行<br>
*
* @param headerLineNo 标题行行号,-1表示无标题行
* @return this
* @since 5.7.23
*/
public CsvReadConfig setHeaderLineNo(long headerLineNo) {
this.headerLineNo = headerLineNo;
return this;
}
/**
* 设置是否跳过空白行默认true
*
* @param skipEmptyRows 是否跳过空白行默认true
* @return this
*/
public CsvReadConfig setSkipEmptyRows(boolean skipEmptyRows) {
this.skipEmptyRows = skipEmptyRows;
return this;
}
/**
* 设置每行字段个数不同时是否抛出异常默认false
*
* @param errorOnDifferentFieldCount 每行字段个数不同时是否抛出异常默认false
* @return this
*/
public CsvReadConfig setErrorOnDifferentFieldCount(boolean errorOnDifferentFieldCount) {
this.errorOnDifferentFieldCount = errorOnDifferentFieldCount;
return this;
}
/**
* 设置开始的行包括默认0此处为原始文件行号
*
* @param beginLineNo 开始的行号(包括)
* @return this
* @since 5.7.4
*/
public CsvReadConfig setBeginLineNo(long beginLineNo) {
this.beginLineNo = beginLineNo;
return this;
}
/**
* 设置结束的行(包括),默认不限制,此处为原始文件行号
*
* @param endLineNo 结束的行号(包括)
* @return this
* @since 5.7.4
*/
public CsvReadConfig setEndLineNo(long endLineNo) {
this.endLineNo = endLineNo;
return this;
}
/**
* 设置每个字段是否去除两边空白符<br>
* 如果字段以{@link #textDelimiter}包围,则保留两边空格
*
* @param trimField 去除两边空白符
* @return this
* @since 5.7.13
*/
public CsvReadConfig setTrimField(boolean trimField) {
this.trimField = trimField;
return this;
}
}

View File

@@ -0,0 +1,153 @@
package cn.hutool.poi.csv;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IORuntimeException;
import cn.hutool.core.io.IoUtil;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
/**
* CSV文件读取器参考FastCSV
*
* @author Looly
* @since 4.0.1
*/
public class CsvReader extends CsvBaseReader implements Iterable<CsvRow>, Closeable {
private static final long serialVersionUID = 1L;
private final Reader reader;
//--------------------------------------------------------------------------------------------- Constructor start
/**
* 构造,使用默认配置项
*/
public CsvReader() {
this(null);
}
/**
* 构造
*
* @param config 配置项
*/
public CsvReader(CsvReadConfig config) {
this((Reader) null, config);
}
/**
* 构造,默认{@link #DEFAULT_CHARSET}编码
*
* @param file CSV文件路径null表示不设置路径
* @param config 配置项null表示默认配置
* @since 5.0.4
*/
public CsvReader(File file, CsvReadConfig config) {
this(file, DEFAULT_CHARSET, config);
}
/**
* 构造,默认{@link #DEFAULT_CHARSET}编码
*
* @param path CSV文件路径null表示不设置路径
* @param config 配置项null表示默认配置
* @since 5.0.4
*/
public CsvReader(Path path, CsvReadConfig config) {
this(path, DEFAULT_CHARSET, config);
}
/**
* 构造
*
* @param file CSV文件路径null表示不设置路径
* @param charset 编码
* @param config 配置项null表示默认配置
* @since 5.0.4
*/
public CsvReader(File file, Charset charset, CsvReadConfig config) {
this(FileUtil.getReader(file, charset), config);
}
/**
* 构造
*
* @param path CSV文件路径null表示不设置路径
* @param charset 编码
* @param config 配置项null表示默认配置
* @since 5.0.4
*/
public CsvReader(Path path, Charset charset, CsvReadConfig config) {
this(FileUtil.getReader(path, charset), config);
}
/**
* 构造
*
* @param reader {@link Reader}null表示不设置默认reader
* @param config 配置项null表示默认配置
* @since 5.0.4
*/
public CsvReader(Reader reader, CsvReadConfig config) {
super(config);
this.reader = reader;
}
//--------------------------------------------------------------------------------------------- Constructor end
/**
* 读取CSV文件此方法只能调用一次<br>
* 调用此方法的前提是构造中传入文件路径或Reader
*
* @return {@link CsvData},包含数据列表和行信息
* @throws IORuntimeException IO异常
*/
public CsvData read() throws IORuntimeException {
return read(this.reader);
}
/**
* 读取CSV数据此方法只能调用一次<br>
* 调用此方法的前提是构造中传入文件路径或Reader
*
* @param rowHandler 行处理器,用于一行一行的处理数据
* @throws IORuntimeException IO异常
* @since 5.0.4
*/
public void read(CsvRowHandler rowHandler) throws IORuntimeException {
read(this.reader, rowHandler);
}
/**
* 根据Reader创建{@link Stream}以便使用stream方式读取csv行
*
* @return {@link Stream}
* @since 5.7.14
*/
public Stream<CsvRow> stream() {
return StreamSupport.stream(spliterator(), false)
.onClose(() -> {
try {
close();
} catch (final IOException e) {
throw new IORuntimeException(e);
}
});
}
@Override
public Iterator<CsvRow> iterator() {
return parse(this.reader);
}
@Override
public void close() throws IOException {
IoUtil.close(this.reader);
}
}

View File

@@ -0,0 +1,267 @@
package cn.hutool.poi.csv;
import cn.hutool.core.bean.BeanUtil;
import cn.hutool.core.lang.Assert;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
/**
* CSV中一行的表示
*
* @author Looly
*/
public final class CsvRow implements List<String> {
/** 原始行号 */
private final long originalLineNumber;
final Map<String, Integer> headerMap;
final List<String> fields;
/**
* 构造
*
* @param originalLineNumber 对应文件中的第几行
* @param headerMap 标题Map
* @param fields 数据列表
*/
public CsvRow(long originalLineNumber, Map<String, Integer> headerMap, List<String> fields) {
Assert.notNull(fields, "fields must be not null!");
this.originalLineNumber = originalLineNumber;
this.headerMap = headerMap;
this.fields = fields;
}
/**
* 获取原始行号,多行情况下为首行行号。忽略注释行
*
* @return the original line number 行号
*/
public long getOriginalLineNumber() {
return originalLineNumber;
}
/**
* 获取标题对应的字段内容
*
* @param name 标题名
* @return 字段值null表示无此字段值
* @throws IllegalStateException CSV文件无标题行抛出此异常
*/
public String getByName(String name) {
Assert.notNull(this.headerMap, "No header available!");
final Integer col = headerMap.get(name);
if (col != null) {
return get(col);
}
return null;
}
/**
* 获取本行所有字段值列表
*
* @return 字段值列表
*/
public List<String> getRawList() {
return fields;
}
/**
* 获取标题与字段值对应的Map
*
* @return 标题与字段值对应的Map
* @throws IllegalStateException CSV文件无标题行抛出此异常
*/
public Map<String, String> getFieldMap() {
if (headerMap == null) {
throw new IllegalStateException("No header available");
}
final Map<String, String> fieldMap = new LinkedHashMap<>(headerMap.size(), 1);
String key;
Integer col;
String val;
for (final Map.Entry<String, Integer> header : headerMap.entrySet()) {
key = header.getKey();
col = headerMap.get(key);
val = null == col ? null : get(col);
fieldMap.put(key, val);
}
return fieldMap;
}
/**
* 一行数据转换为Bean对象
*
* @param <T> Bean类型
* @param clazz bean类
* @return Bean
* @since 5.3.6
*/
public <T> T toBean(Class<T> clazz){
return BeanUtil.toBeanIgnoreError(getFieldMap(), clazz);
}
/**
* 获取字段格式
*
* @return 字段格式
*/
public int getFieldCount() {
return fields.size();
}
@Override
public int size() {
return this.fields.size();
}
@Override
public boolean isEmpty() {
return this.fields.isEmpty();
}
@Override
public boolean contains(Object o) {
return this.fields.contains(o);
}
@Override
public Iterator<String> iterator() {
return this.fields.iterator();
}
@Override
public Object[] toArray() {
return this.fields.toArray();
}
@Override
public <T> T[] toArray(T[] a) {
//noinspection SuspiciousToArrayCall
return this.fields.toArray(a);
}
@Override
public boolean add(String e) {
return this.fields.add(e);
}
@Override
public boolean remove(Object o) {
return this.fields.remove(o);
}
@Override
public boolean containsAll(Collection<?> c) {
return this.fields.containsAll(c);
}
@Override
public boolean addAll(Collection<? extends String> c) {
return this.fields.addAll(c);
}
@Override
public boolean addAll(int index, Collection<? extends String> c) {
return this.fields.addAll(index, c);
}
@Override
public boolean removeAll(Collection<?> c) {
return this.fields.removeAll(c);
}
@Override
public boolean retainAll(Collection<?> c) {
return this.fields.retainAll(c);
}
@Override
public void clear() {
this.fields.clear();
}
@Override
public String get(int index) {
return index >= fields.size() ? null : fields.get(index);
}
@Override
public String set(int index, String element) {
return this.fields.set(index, element);
}
@Override
public void add(int index, String element) {
this.fields.add(index, element);
}
@Override
public String remove(int index) {
return this.fields.remove(index);
}
@Override
public int indexOf(Object o) {
return this.fields.indexOf(o);
}
@Override
public int lastIndexOf(Object o) {
return this.fields.lastIndexOf(o);
}
@Override
public ListIterator<String> listIterator() {
return this.fields.listIterator();
}
@Override
public ListIterator<String> listIterator(int index) {
return this.fields.listIterator(index);
}
@Override
public List<String> subList(int fromIndex, int toIndex) {
return this.fields.subList(fromIndex, toIndex);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CsvRow{");
sb.append("originalLineNumber=");
sb.append(originalLineNumber);
sb.append(", ");
sb.append("fields=");
if (headerMap != null) {
sb.append('{');
for (final Iterator<Map.Entry<String, String>> it = getFieldMap().entrySet().iterator(); it.hasNext();) {
final Map.Entry<String, String> entry = it.next();
sb.append(entry.getKey());
sb.append('=');
if (entry.getValue() != null) {
sb.append(entry.getValue());
}
if (it.hasNext()) {
sb.append(", ");
}
}
sb.append('}');
} else {
sb.append(fields.toString());
}
sb.append('}');
return sb.toString();
}
}

View File

@@ -0,0 +1,18 @@
package cn.hutool.poi.csv;
/**
* CSV的行处理器实现此接口用于按照行处理数据
*
* @author Looly
* @since 5.0.4
*/
@FunctionalInterface
public interface CsvRowHandler {
/**
* 处理行数据
*
* @param row 行数据
*/
void handle(CsvRow row);
}

View File

@@ -0,0 +1,141 @@
package cn.hutool.poi.csv;
import java.io.File;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.Charset;
/**
* CSV工具
*
* @author looly
* @since 4.0.5
*/
public class CsvUtil {
//----------------------------------------------------------------------------------------------------------- Reader
/**
* 获取CSV读取器调用此方法创建的Reader须自行指定读取的资源
*
* @param config 配置, 允许为空.
* @return {@link CsvReader}
*/
public static CsvReader getReader(CsvReadConfig config) {
return new CsvReader(config);
}
/**
* 获取CSV读取器调用此方法创建的Reader须自行指定读取的资源
*
* @return {@link CsvReader}
*/
public static CsvReader getReader() {
return new CsvReader();
}
/**
* 获取CSV读取器
*
* @param reader {@link Reader}
* @param config 配置, {@code null}表示默认配置
* @return {@link CsvReader}
* @since 5.7.14
*/
public static CsvReader getReader(Reader reader, CsvReadConfig config) {
return new CsvReader(reader, config);
}
/**
* 获取CSV读取器
*
* @param reader {@link Reader}
* @return {@link CsvReader}
* @since 5.7.14
*/
public static CsvReader getReader(Reader reader) {
return getReader(reader, null);
}
//----------------------------------------------------------------------------------------------------------- Writer
/**
* 获取CSV生成器写出器使用默认配置覆盖已有文件如果存在
*
* @param filePath File CSV文件路径
* @param charset 编码
* @return {@link CsvWriter}
*/
public static CsvWriter getWriter(String filePath, Charset charset) {
return new CsvWriter(filePath, charset);
}
/**
* 获取CSV生成器写出器使用默认配置覆盖已有文件如果存在
*
* @param file File CSV文件
* @param charset 编码
* @return {@link CsvWriter}
*/
public static CsvWriter getWriter(File file, Charset charset) {
return new CsvWriter(file, charset);
}
/**
* 获取CSV生成器写出器使用默认配置
*
* @param filePath File CSV文件路径
* @param charset 编码
* @param isAppend 是否追加
* @return {@link CsvWriter}
*/
public static CsvWriter getWriter(String filePath, Charset charset, boolean isAppend) {
return new CsvWriter(filePath, charset, isAppend);
}
/**
* 获取CSV生成器写出器使用默认配置
*
* @param file File CSV文件
* @param charset 编码
* @param isAppend 是否追加
* @return {@link CsvWriter}
*/
public static CsvWriter getWriter(File file, Charset charset, boolean isAppend) {
return new CsvWriter(file, charset, isAppend);
}
/**
* 获取CSV生成器写出器
*
* @param file File CSV文件
* @param charset 编码
* @param isAppend 是否追加
* @param config 写出配置null则使用默认配置
* @return {@link CsvWriter}
*/
public static CsvWriter getWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) {
return new CsvWriter(file, charset, isAppend, config);
}
/**
* 获取CSV生成器写出器
*
* @param writer Writer
* @return {@link CsvWriter}
*/
public static CsvWriter getWriter(Writer writer) {
return new CsvWriter(writer);
}
/**
* 获取CSV生成器写出器
*
* @param writer Writer
* @param config 写出配置null则使用默认配置
* @return {@link CsvWriter}
*/
public static CsvWriter getWriter(Writer writer, CsvWriteConfig config) {
return new CsvWriter(writer, config);
}
}

View File

@@ -0,0 +1,54 @@
package cn.hutool.poi.csv;
import cn.hutool.core.util.CharUtil;
import java.io.Serializable;
/**
* CSV写出配置项
*
* @author looly
*/
public class CsvWriteConfig extends CsvConfig<CsvWriteConfig> implements Serializable {
private static final long serialVersionUID = 5396453565371560052L;
/**
* 是否始终使用文本分隔符文本包装符默认false按需添加
*/
protected boolean alwaysDelimitText;
/**
* 换行符
*/
protected char[] lineDelimiter = {CharUtil.CR, CharUtil.LF};
/**
* 默认配置
*
* @return 默认配置
*/
public static CsvWriteConfig defaultConfig() {
return new CsvWriteConfig();
}
/**
* 设置是否始终使用文本分隔符文本包装符默认false按需添加
*
* @param alwaysDelimitText 是否始终使用文本分隔符文本包装符默认false按需添加
* @return this
*/
public CsvWriteConfig setAlwaysDelimitText(boolean alwaysDelimitText) {
this.alwaysDelimitText = alwaysDelimitText;
return this;
}
/**
* 设置换行符
*
* @param lineDelimiter 换行符
* @return this
*/
public CsvWriteConfig setLineDelimiter(char[] lineDelimiter) {
this.lineDelimiter = lineDelimiter;
return this;
}
}

View File

@@ -0,0 +1,451 @@
package cn.hutool.poi.csv;
import cn.hutool.core.bean.BeanUtil;
import cn.hutool.core.collection.ArrayIter;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.convert.Convert;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IORuntimeException;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.CharUtil;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.ObjectUtil;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.Flushable;
import java.io.IOException;
import java.io.Serializable;
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
/**
* CSV数据写出器
*
* @author Looly
* @since 4.0.5
*/
public final class CsvWriter implements Closeable, Flushable, Serializable {
private static final long serialVersionUID = 1L;
/**
* 写出器
*/
private final Writer writer;
/**
* 写出配置
*/
private final CsvWriteConfig config;
/**
* 是否处于新行开始
*/
private boolean newline = true;
/**
* 是否首行即CSV开始的位置当初始化时默认为true一旦写入内容为false
*/
private boolean isFirstLine = true;
// --------------------------------------------------------------------------------------------------- Constructor start
/**
* 构造覆盖已有文件如果存在默认编码UTF-8
*
* @param filePath File CSV文件路径
*/
public CsvWriter(String filePath) {
this(FileUtil.file(filePath));
}
/**
* 构造覆盖已有文件如果存在默认编码UTF-8
*
* @param file File CSV文件
*/
public CsvWriter(File file) {
this(file, CharsetUtil.CHARSET_UTF_8);
}
/**
* 构造,覆盖已有文件(如果存在)
*
* @param filePath File CSV文件路径
* @param charset 编码
*/
public CsvWriter(String filePath, Charset charset) {
this(FileUtil.file(filePath), charset);
}
/**
* 构造,覆盖已有文件(如果存在)
*
* @param file File CSV文件
* @param charset 编码
*/
public CsvWriter(File file, Charset charset) {
this(file, charset, false);
}
/**
* 构造
*
* @param filePath File CSV文件路径
* @param charset 编码
* @param isAppend 是否追加
*/
public CsvWriter(String filePath, Charset charset, boolean isAppend) {
this(FileUtil.file(filePath), charset, isAppend);
}
/**
* 构造
*
* @param file CSV文件
* @param charset 编码
* @param isAppend 是否追加
*/
public CsvWriter(File file, Charset charset, boolean isAppend) {
this(file, charset, isAppend, null);
}
/**
* 构造
*
* @param filePath CSV文件路径
* @param charset 编码
* @param isAppend 是否追加
* @param config 写出配置null则使用默认配置
*/
public CsvWriter(String filePath, Charset charset, boolean isAppend, CsvWriteConfig config) {
this(FileUtil.file(filePath), charset, isAppend, config);
}
/**
* 构造
*
* @param file CSV文件
* @param charset 编码
* @param isAppend 是否追加
* @param config 写出配置null则使用默认配置
*/
public CsvWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) {
this(FileUtil.getWriter(file, charset, isAppend), config);
}
/**
* 构造,使用默认配置
*
* @param writer {@link Writer}
*/
public CsvWriter(Writer writer) {
this(writer, null);
}
/**
* 构造
*
* @param writer Writer
* @param config 写出配置null则使用默认配置
*/
public CsvWriter(Writer writer, CsvWriteConfig config) {
this.writer = (writer instanceof BufferedWriter) ? writer : new BufferedWriter(writer);
this.config = ObjectUtil.defaultIfNull(config, CsvWriteConfig::defaultConfig);
}
// --------------------------------------------------------------------------------------------------- Constructor end
/**
* 设置是否始终使用文本分隔符文本包装符默认false按需添加
*
* @param alwaysDelimitText 是否始终使用文本分隔符文本包装符默认false按需添加
* @return this
*/
public CsvWriter setAlwaysDelimitText(boolean alwaysDelimitText) {
this.config.setAlwaysDelimitText(alwaysDelimitText);
return this;
}
/**
* 设置换行符
*
* @param lineDelimiter 换行符
* @return this
*/
public CsvWriter setLineDelimiter(char[] lineDelimiter) {
this.config.setLineDelimiter(lineDelimiter);
return this;
}
/**
* 将多行写出到Writer
*
* @param lines 多行数据
* @return this
* @throws IORuntimeException IO异常
*/
public CsvWriter write(String[]... lines) throws IORuntimeException {
return write(new ArrayIter<>(lines));
}
/**
* 将多行写出到Writer
*
* @param lines 多行数据,每行数据可以是集合或者数组
* @return this
* @throws IORuntimeException IO异常
*/
public CsvWriter write(Iterable<?> lines) throws IORuntimeException {
if (CollUtil.isNotEmpty(lines)) {
for (Object values : lines) {
appendLine(Convert.toStrArray(values));
}
flush();
}
return this;
}
/**
* 将一个 CsvData 集合写出到Writer
*
* @param csvData CsvData
* @return this
* @since 5.7.4
*/
public CsvWriter write(CsvData csvData) {
if (csvData != null) {
// 1、写header
final List<String> header = csvData.getHeader();
if (CollUtil.isNotEmpty(header)) {
this.writeHeaderLine(header.toArray(new String[0]));
}
// 2、写内容
this.write(csvData.getRows());
flush();
}
return this;
}
/**
* 将一个Bean集合写出到Writer并自动生成表头
*
* @param beans Bean集合
* @return this
*/
public CsvWriter writeBeans(Iterable<?> beans) {
if (CollUtil.isNotEmpty(beans)) {
boolean isFirst = true;
Map<String, Object> map;
for (Object bean : beans) {
map = BeanUtil.beanToMap(bean);
if (isFirst) {
writeHeaderLine(map.keySet().toArray(new String[0]));
isFirst = false;
}
writeLine(Convert.toStrArray(map.values()));
}
flush();
}
return this;
}
/**
* 写出一行头部行,支持标题别名
*
* @param fields 字段列表 ({@code null} 值会被做为空值追加
* @return this
* @throws IORuntimeException IO异常
* @since 5.7.10
*/
public CsvWriter writeHeaderLine(String... fields) throws IORuntimeException {
final Map<String, String> headerAlias = this.config.headerAlias;
if (MapUtil.isNotEmpty(headerAlias)) {
// 标题别名替换
String alias;
for (int i = 0; i < fields.length; i++) {
alias = headerAlias.get(fields[i]);
if (null != alias) {
fields[i] = alias;
}
}
}
return writeLine(fields);
}
/**
* 写出一行
*
* @param fields 字段列表 ({@code null} 值会被做为空值追加)
* @return this
* @throws IORuntimeException IO异常
* @since 5.5.7
*/
public CsvWriter writeLine(String... fields) throws IORuntimeException {
if (ArrayUtil.isEmpty(fields)) {
return writeLine();
}
appendLine(fields);
return this;
}
/**
* 追加新行(换行)
*
* @return this
* @throws IORuntimeException IO异常
*/
public CsvWriter writeLine() throws IORuntimeException {
try {
writer.write(config.lineDelimiter);
} catch (IOException e) {
throw new IORuntimeException(e);
}
newline = true;
return this;
}
/**
* 写出一行注释,注释符号可自定义<br>
* 如果注释符不存在,则抛出异常
*
* @param comment 注释内容
* @return this
* @see CsvConfig#commentCharacter
* @since 5.5.7
*/
public CsvWriter writeComment(String comment) {
Assert.notNull(this.config.commentCharacter, "Comment is disable!");
try {
if(isFirstLine){
// 首行不补换行符
isFirstLine = false;
}else {
writer.write(config.lineDelimiter);
}
writer.write(this.config.commentCharacter);
writer.write(comment);
newline = true;
} catch (IOException e) {
throw new IORuntimeException(e);
}
return this;
}
@Override
public void close() {
IoUtil.close(this.writer);
}
@Override
public void flush() throws IORuntimeException {
try {
writer.flush();
} catch (IOException e) {
throw new IORuntimeException(e);
}
}
// --------------------------------------------------------------------------------------------------- Private method start
/**
* 追加一行,末尾会自动换行,但是追加前不会换行
*
* @param fields 字段列表 ({@code null} 值会被做为空值追加)
* @throws IORuntimeException IO异常
*/
private void appendLine(String... fields) throws IORuntimeException {
try {
doAppendLine(fields);
} catch (IOException e) {
throw new IORuntimeException(e);
}
}
/**
* 追加一行,末尾会自动换行,但是追加前不会换行
*
* @param fields 字段列表 ({@code null} 值会被做为空值追加)
* @throws IOException IO异常
*/
private void doAppendLine(String... fields) throws IOException {
if (null != fields) {
if(isFirstLine){
// 首行不补换行符
isFirstLine = false;
}else {
writer.write(config.lineDelimiter);
}
for (String field : fields) {
appendField(field);
}
newline = true;
}
}
/**
* 在当前行追加字段值,自动添加字段分隔符,如果有必要,自动包装字段
*
* @param value 字段值,{@code null} 会被做为空串写出
* @throws IOException IO异常
*/
private void appendField(final String value) throws IOException {
boolean alwaysDelimitText = config.alwaysDelimitText;
char textDelimiter = config.textDelimiter;
char fieldSeparator = config.fieldSeparator;
if (false == newline) {
writer.write(fieldSeparator);
} else {
newline = false;
}
if (null == value) {
if (alwaysDelimitText) {
writer.write(new char[]{textDelimiter, textDelimiter});
}
return;
}
final char[] valueChars = value.toCharArray();
boolean needsTextDelimiter = alwaysDelimitText;
boolean containsTextDelimiter = false;
for (final char c : valueChars) {
if (c == textDelimiter) {
// 字段值中存在包装符
containsTextDelimiter = needsTextDelimiter = true;
break;
} else if (c == fieldSeparator || c == CharUtil.LF || c == CharUtil.CR) {
// 包含分隔符或换行符需要包装符包装
needsTextDelimiter = true;
}
}
// 包装符开始
if (needsTextDelimiter) {
writer.write(textDelimiter);
}
// 正文
if (containsTextDelimiter) {
for (final char c : valueChars) {
// 转义文本包装符
if (c == textDelimiter) {
writer.write(textDelimiter);
}
writer.write(c);
}
} else {
writer.write(valueChars);
}
// 包装符结尾
if (needsTextDelimiter) {
writer.write(textDelimiter);
}
}
// --------------------------------------------------------------------------------------------------- Private method end
}

View File

@@ -0,0 +1,8 @@
/**
* 提供CSV文件读写的封装入口为CsvUtil<br>
* 规范见https://datatracker.ietf.org/doc/html/rfc4180
*
* @author looly
*
*/
package cn.hutool.poi.csv;

View File

@@ -0,0 +1,63 @@
package cn.hutool.poi.csv;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.util.StrUtil;
import org.junit.Assert;
import org.junit.Test;
import java.io.StringReader;
public class CsvParserTest {
@Test
public void parseTest1() {
StringReader reader = StrUtil.getReader("aaa,b\"bba\",ccc");
CsvParser parser = new CsvParser(reader, null);
CsvRow row = parser.nextRow();
//noinspection ConstantConditions
Assert.assertEquals("b\"bba\"", row.getRawList().get(1));
IoUtil.close(parser);
}
@Test
public void parseTest2() {
StringReader reader = StrUtil.getReader("aaa,\"bba\"bbb,ccc");
CsvParser parser = new CsvParser(reader, null);
CsvRow row = parser.nextRow();
//noinspection ConstantConditions
Assert.assertEquals("\"bba\"bbb", row.getRawList().get(1));
IoUtil.close(parser);
}
@Test
public void parseTest3() {
StringReader reader = StrUtil.getReader("aaa,\"bba\",ccc");
CsvParser parser = new CsvParser(reader, null);
CsvRow row = parser.nextRow();
//noinspection ConstantConditions
Assert.assertEquals("bba", row.getRawList().get(1));
IoUtil.close(parser);
}
@Test
public void parseTest4() {
StringReader reader = StrUtil.getReader("aaa,\"\",ccc");
CsvParser parser = new CsvParser(reader, null);
CsvRow row = parser.nextRow();
//noinspection ConstantConditions
Assert.assertEquals("", row.getRawList().get(1));
IoUtil.close(parser);
}
@Test
public void parseEscapeTest(){
// https://datatracker.ietf.org/doc/html/rfc4180#section-2
// 第七条规则
StringReader reader = StrUtil.getReader("\"b\"\"bb\"");
CsvParser parser = new CsvParser(reader, null);
CsvRow row = parser.nextRow();
Assert.assertNotNull(row);
Assert.assertEquals(1, row.size());
Assert.assertEquals("b\"bb", row.get(0));
}
}

View File

@@ -0,0 +1,205 @@
package cn.hutool.poi.csv;
import cn.hutool.core.annotation.Alias;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.resource.ResourceUtil;
import cn.hutool.core.lang.Console;
import cn.hutool.core.util.CharsetUtil;
import lombok.Data;
import org.junit.Assert;
import org.junit.Test;
import org.junit.Ignore;
import java.util.List;
import java.util.Map;
public class CsvReaderTest {
@Test
public void readTest() {
CsvReader reader = new CsvReader();
CsvData data = reader.read(ResourceUtil.getReader("test.csv", CharsetUtil.CHARSET_UTF_8));
Assert.assertEquals("sss,sss", data.getRow(0).get(0));
Assert.assertEquals(1, data.getRow(0).getOriginalLineNumber());
Assert.assertEquals("性别", data.getRow(0).get(2));
Assert.assertEquals("关注\"对象\"", data.getRow(0).get(3));
}
@Test
public void readMapListTest() {
final CsvReader reader = CsvUtil.getReader();
final List<Map<String, String>> result = reader.readMapList(
ResourceUtil.getUtf8Reader("test_bean.csv"));
Assert.assertEquals("张三", result.get(0).get("姓名"));
Assert.assertEquals("", result.get(0).get("gender"));
Assert.assertEquals("", result.get(0).get("focus"));
Assert.assertEquals("33", result.get(0).get("age"));
Assert.assertEquals("李四", result.get(1).get("姓名"));
Assert.assertEquals("", result.get(1).get("gender"));
Assert.assertEquals("好对象", result.get(1).get("focus"));
Assert.assertEquals("23", result.get(1).get("age"));
Assert.assertEquals("王妹妹", result.get(2).get("姓名"));
Assert.assertEquals("", result.get(2).get("gender"));
Assert.assertEquals("特别关注", result.get(2).get("focus"));
Assert.assertEquals("22", result.get(2).get("age"));
}
@Test
public void readAliasMapListTest() {
final CsvReadConfig csvReadConfig = CsvReadConfig.defaultConfig();
csvReadConfig.addHeaderAlias("姓名", "name");
final CsvReader reader = CsvUtil.getReader(csvReadConfig);
final List<Map<String, String>> result = reader.readMapList(
ResourceUtil.getUtf8Reader("test_bean.csv"));
Assert.assertEquals("张三", result.get(0).get("name"));
Assert.assertEquals("", result.get(0).get("gender"));
Assert.assertEquals("", result.get(0).get("focus"));
Assert.assertEquals("33", result.get(0).get("age"));
Assert.assertEquals("李四", result.get(1).get("name"));
Assert.assertEquals("", result.get(1).get("gender"));
Assert.assertEquals("好对象", result.get(1).get("focus"));
Assert.assertEquals("23", result.get(1).get("age"));
Assert.assertEquals("王妹妹", result.get(2).get("name"));
Assert.assertEquals("", result.get(2).get("gender"));
Assert.assertEquals("特别关注", result.get(2).get("focus"));
Assert.assertEquals("22", result.get(2).get("age"));
}
@Test
public void readBeanListTest() {
final CsvReader reader = CsvUtil.getReader();
final List<TestBean> result = reader.read(
ResourceUtil.getUtf8Reader("test_bean.csv"), TestBean.class);
Assert.assertEquals("张三", result.get(0).getName());
Assert.assertEquals("", result.get(0).getGender());
Assert.assertEquals("", result.get(0).getFocus());
Assert.assertEquals(Integer.valueOf(33), result.get(0).getAge());
Assert.assertEquals("李四", result.get(1).getName());
Assert.assertEquals("", result.get(1).getGender());
Assert.assertEquals("好对象", result.get(1).getFocus());
Assert.assertEquals(Integer.valueOf(23), result.get(1).getAge());
Assert.assertEquals("王妹妹", result.get(2).getName());
Assert.assertEquals("", result.get(2).getGender());
Assert.assertEquals("特别关注", result.get(2).getFocus());
Assert.assertEquals(Integer.valueOf(22), result.get(2).getAge());
}
@Data
private static class TestBean {
@Alias("姓名")
private String name;
private String gender;
private String focus;
private Integer age;
}
@Test
@Ignore
public void readTest2() {
final CsvReader reader = CsvUtil.getReader();
final CsvData read = reader.read(FileUtil.file("d:/test/test.csv"));
for (CsvRow strings : read) {
Console.log(strings);
}
}
@Test
@Ignore
public void readTest3() {
final CsvReadConfig csvReadConfig = CsvReadConfig.defaultConfig();
csvReadConfig.setContainsHeader(true);
final CsvReader reader = CsvUtil.getReader(csvReadConfig);
final CsvData read = reader.read(FileUtil.file("d:/test/ceshi.csv"));
for (CsvRow row : read) {
Console.log(row.getByName("案件ID"));
}
}
@Test
public void lineNoTest() {
CsvReader reader = new CsvReader();
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
Assert.assertEquals(1, data.getRow(0).getOriginalLineNumber());
Assert.assertEquals("a,b,c,d", CollUtil.join(data.getRow(0), ","));
Assert.assertEquals(4, data.getRow(2).getOriginalLineNumber());
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容",
CollUtil.join(data.getRow(2), ",").replace("\r", ""));
// 文件中第3行数据对应原始行号是6从0开始
Assert.assertEquals(6, data.getRow(3).getOriginalLineNumber());
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(3), ","));
}
@Test
public void lineLimitTest() {
// 从原始第2行开始读取
CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2));
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
Assert.assertEquals(2, data.getRow(0).getOriginalLineNumber());
Assert.assertEquals("1,2,3,4", CollUtil.join(data.getRow(0), ","));
Assert.assertEquals(4, data.getRow(1).getOriginalLineNumber());
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容",
CollUtil.join(data.getRow(1), ",").replace("\r", ""));
// 文件中第3行数据对应原始行号是6从0开始
Assert.assertEquals(6, data.getRow(2).getOriginalLineNumber());
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(2), ","));
}
@Test
public void lineLimitWithHeaderTest() {
// 从原始第2行开始读取
CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2).setContainsHeader(true));
CsvData data = reader.read(ResourceUtil.getReader("test_lines.csv", CharsetUtil.CHARSET_UTF_8));
Assert.assertEquals(4, data.getRow(0).getOriginalLineNumber());
Assert.assertEquals("q,w,e,r,我是一段\n带换行的内容",
CollUtil.join(data.getRow(0), ",").replace("\r", ""));
// 文件中第3行数据对应原始行号是6从0开始
Assert.assertEquals(6, data.getRow(1).getOriginalLineNumber());
Assert.assertEquals("a,s,d,f", CollUtil.join(data.getRow(1), ","));
}
@Test
public void customConfigTest() {
final CsvReader reader = CsvUtil.getReader(
CsvReadConfig.defaultConfig()
.setTextDelimiter('\'')
.setFieldSeparator(';'));
final CsvData csvRows = reader.readFromStr("123;456;'789;0'abc;");
final CsvRow row = csvRows.getRow(0);
Assert.assertEquals("123", row.get(0));
Assert.assertEquals("456", row.get(1));
Assert.assertEquals("'789;0'abc", row.get(2));
}
@Test
public void readDisableCommentTest() {
final CsvReader reader = CsvUtil.getReader(CsvReadConfig.defaultConfig().disableComment());
final CsvData read = reader.read(ResourceUtil.getUtf8Reader("test.csv"));
final CsvRow row = read.getRow(0);
Assert.assertEquals("# 这是一行注释,读取时应忽略", row.get(0));
}
@Test
@Ignore
public void streamTest() {
final CsvReader reader = CsvUtil.getReader(ResourceUtil.getUtf8Reader("test_bean.csv"));
reader.stream().limit(2).forEach(Console::log);
}
}

View File

@@ -0,0 +1,207 @@
package cn.hutool.poi.csv;
import cn.hutool.core.bean.BeanUtil;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Console;
import cn.hutool.core.util.CharsetUtil;
import lombok.AllArgsConstructor;
import lombok.Data;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class CsvUtilTest {
@Test
public void readTest() {
CsvReader reader = CsvUtil.getReader();
//从文件中读取CSV数据
CsvData data = reader.read(FileUtil.file("test.csv"));
List<CsvRow> rows = data.getRows();
final CsvRow row0 = rows.get(0);
Assert.assertEquals("sss,sss", row0.get(0));
Assert.assertEquals("姓名", row0.get(1));
Assert.assertEquals("性别", row0.get(2));
Assert.assertEquals("关注\"对象\"", row0.get(3));
Assert.assertEquals("年龄", row0.get(4));
Assert.assertEquals("", row0.get(5));
Assert.assertEquals("\"", row0.get(6));
}
@Test
public void readTest2() {
CsvReader reader = CsvUtil.getReader();
reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> {
// 只有一行,所以直接判断
Assert.assertEquals("sss,sss", csvRow.get(0));
Assert.assertEquals("姓名", csvRow.get(1));
Assert.assertEquals("性别", csvRow.get(2));
Assert.assertEquals("关注\"对象\"", csvRow.get(3));
Assert.assertEquals("年龄", csvRow.get(4));
Assert.assertEquals("", csvRow.get(5));
Assert.assertEquals("\"", csvRow.get(6));
});
}
@Test
@Ignore
public void readTest3() {
CsvReader reader = CsvUtil.getReader();
String path = FileUtil.isWindows() ? "d:/test/test.csv" : "~/test/test.csv";
reader.read(FileUtil.getUtf8Reader(path), Console::log);
}
@Test
public void readCsvStr1(){
CsvData data = CsvUtil.getReader().readFromStr("# 这是一行注释,读取时应忽略\n" +
"\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n");
List<CsvRow> rows = data.getRows();
final CsvRow row0 = rows.get(0);
Assert.assertEquals("sss,sss", row0.get(0));
Assert.assertEquals("姓名", row0.get(1));
Assert.assertEquals("性别", row0.get(2));
Assert.assertEquals("关注\"对象\"", row0.get(3));
Assert.assertEquals("年龄", row0.get(4));
Assert.assertEquals("", row0.get(5));
Assert.assertEquals("\"", row0.get(6));
}
@Test
public void readCsvStr2(){
CsvUtil.getReader().readFromStr("# 这是一行注释,读取时应忽略\n" +
"\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n",(csvRow)-> {
// 只有一行,所以直接判断
Assert.assertEquals("sss,sss", csvRow.get(0));
Assert.assertEquals("姓名", csvRow.get(1));
Assert.assertEquals("性别", csvRow.get(2));
Assert.assertEquals("关注\"对象\"", csvRow.get(3));
Assert.assertEquals("年龄", csvRow.get(4));
Assert.assertEquals("", csvRow.get(5));
Assert.assertEquals("\"", csvRow.get(6));
});
}
@Test
@Ignore
public void writeTest() {
String path = FileUtil.isWindows() ? "d:/test/testWrite.csv" : "~/test/testWrite.csv";
CsvWriter writer = CsvUtil.getWriter(path, CharsetUtil.CHARSET_UTF_8);
writer.write(
new String[] {"a1", "b1", "c1", "123345346456745756756785656"},
new String[] {"a2", "b2", "c2"},
new String[] {"a3", "b3", "c3"}
);
}
@Test
@Ignore
public void writeBeansTest() {
@Data
class Student {
Integer id;
String name;
Integer age;
}
String path = FileUtil.isWindows() ? "d:/test/testWriteBeans.csv" : "~/test/testWriteBeans.csv";
CsvWriter writer = CsvUtil.getWriter(path, CharsetUtil.CHARSET_UTF_8);
List<Student> students = new ArrayList<>();
Student student1 = new Student();
student1.setId(1);
student1.setName("张三");
student1.setAge(18);
Student student2 = new Student();
student2.setId(2);
student2.setName("李四");
student2.setAge(22);
Student student3 = new Student();
student3.setId(3);
student3.setName("王五");
student3.setAge(31);
students.add(student1);
students.add(student2);
students.add(student3);
writer.writeBeans(students);
writer.close();
}
@Test
@Ignore
public void readLfTest(){
final CsvReader reader = CsvUtil.getReader();
String path = FileUtil.isWindows() ? "d:/test/rw_test.csv" : "~/test/rw_test.csv";
final CsvData read = reader.read(FileUtil.file(path));
for (CsvRow row : read) {
Console.log(row);
}
}
@Test
@Ignore
public void writeWrapTest(){
List<List<Object>> resultList=new ArrayList<>();
List<Object> list =new ArrayList<>();
list.add("\"name\"");
list.add("\"code\"");
resultList.add(list);
list =new ArrayList<>();
list.add("\"wang\"");
list.add(1);
resultList.add(list);
String path = FileUtil.isWindows() ? "d:/test/csvWrapTest.csv" : "~/test/csvWrapTest.csv";
final CsvWriter writer = CsvUtil.getWriter(path, CharsetUtil.CHARSET_UTF_8);
writer.write(resultList);
}
@Test
@Ignore
public void writeDataTest(){
@Data
@AllArgsConstructor
class User {
Integer userId;
String username;
String mobile;
}
List<String> header = ListUtil.of("用户id", "用户名", "手机号");
List<CsvRow> row = new ArrayList<>();
List<User> datas = new ArrayList<>();
datas.add(new User(1, "张三", "18800001111"));
datas.add(new User(2, "李四", "18800001112"));
datas.add(new User(3, "王五", "18800001113"));
datas.add(new User(4, "赵六", "18800001114"));
//可以为null
//Map<String, Integer> headMap = null;
Map<String, Integer> headMap = new HashMap<>();
headMap.put("userId", 0);
headMap.put("username", 1);
headMap.put("mobile", 2);
for (User user : datas) {
// row.size() + 1, 表示从第2行开始第一行是标题栏
row.add(new CsvRow(row.size() + 1, headMap,
BeanUtil.beanToMap(user).values().stream().map(Object::toString).collect(Collectors.toList())));
}
CsvData csvData = new CsvData(header, row);
String path = FileUtil.isWindows() ? "d:/test/csvWriteDataTest.csv" : "~/test/csvWriteDataTest.csv";
final CsvWriter writer = CsvUtil.getWriter(path, CharsetUtil.CHARSET_UTF_8);
writer.write(csvData);
}
}

View File

@@ -0,0 +1,47 @@
package cn.hutool.poi.csv;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Console;
import cn.hutool.core.util.CharsetUtil;
import org.junit.Ignore;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class CsvWriterTest {
@Test
@Ignore
public void writeWithAliasTest(){
final CsvWriteConfig csvWriteConfig = CsvWriteConfig.defaultConfig()
.addHeaderAlias("name", "姓名")
.addHeaderAlias("gender", "性别");
final CsvWriter writer = CsvUtil.getWriter(
FileUtil.file("d:/test/csvAliasTest.csv"),
CharsetUtil.CHARSET_GBK, false, csvWriteConfig);
writer.writeHeaderLine("name", "gender", "address");
writer.writeLine("张三", "", "XX市XX区");
writer.writeLine("李四", "", "XX市XX区,01号");
writer.close();
}
@Test
@Ignore
public void issue2255Test(){
String fileName = "D:/test/" + new Random().nextInt(100) + "-a.csv";
CsvWriter writer = CsvUtil.getWriter(fileName, CharsetUtil.CHARSET_UTF_8);
List<String> list = new ArrayList<>();
for (int i = 0; i < 10000; i++) {
list.add(i+"");
}
Console.log("{} : {}", fileName, list.size());
for (String s : list) {
writer.writeLine(s);
}
writer.close();
}
}