修复双引号转义符转义错误问题,修改规则后,对非闭合双引号字段的策略变更,如"aa,则被识别为aa修复双引号转义符转义错误问题,修改规则后,对非闭合双引号字段的策略变更,如"aa,则被识别为aa

This commit is contained in:
Looly
2024-11-24 00:18:33 +08:00
parent 755aed01de
commit c50625e215
9 changed files with 124 additions and 547 deletions

View File

@@ -17,7 +17,6 @@
package org.dromara.hutool.poi.csv;
import org.dromara.hutool.core.io.IoUtil;
import org.dromara.hutool.core.lang.Console;
import org.dromara.hutool.core.text.StrUtil;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -29,7 +28,7 @@ public class CsvParserTest {
@Test
public void parseTest1() {
final StringReader reader = StrUtil.getReader("aaa,b\"bba\",ccc");
final CsvParser2 parser = new CsvParser2(reader, null);
final CsvParser parser = new CsvParser(reader, null);
final CsvRow row = parser.nextRow();
//noinspection ConstantConditions
Assertions.assertEquals("b\"bba\"", row.getRaw().get(1));
@@ -39,7 +38,7 @@ public class CsvParserTest {
@Test
public void parseTest2() {
final StringReader reader = StrUtil.getReader("aaa,\"bba\"bbb,ccc");
final CsvParser2 parser = new CsvParser2(reader, null);
final CsvParser parser = new CsvParser(reader, null);
final CsvRow row = parser.nextRow();
//noinspection ConstantConditions
Assertions.assertEquals("\"bba\"bbb", row.getRaw().get(1));
@@ -49,7 +48,7 @@ public class CsvParserTest {
@Test
public void parseTest3() {
final StringReader reader = StrUtil.getReader("aaa,\"bba\",ccc");
final CsvParser2 parser = new CsvParser2(reader, null);
final CsvParser parser = new CsvParser(reader, null);
final CsvRow row = parser.nextRow();
//noinspection ConstantConditions
Assertions.assertEquals("bba", row.getRaw().get(1));
@@ -59,7 +58,7 @@ public class CsvParserTest {
@Test
public void parseTest4() {
final StringReader reader = StrUtil.getReader("aaa,\"\",ccc");
final CsvParser2 parser = new CsvParser2(reader, null);
final CsvParser parser = new CsvParser(reader, null);
final CsvRow row = parser.nextRow();
//noinspection ConstantConditions
Assertions.assertEquals("", row.getRaw().get(1));
@@ -80,9 +79,36 @@ public class CsvParserTest {
@Test
void issueIB5UQ8Test() {
String csv = "\"Consultancy, 10\"\",, food\"";
final String csv = "\"Consultancy, 10\"\",, food\"";
final CsvReader reader = CsvUtil.getReader(new StringReader(csv));
final String s = reader.read().getRow(0).get(0);
Console.log(s);
Assertions.assertEquals("Consultancy, 10\",, food", s);
}
@Test
void textDelimiterAtEndTest() {
final String csv = "\"Consultancy, 10\"";
final CsvReader reader = CsvUtil.getReader(new StringReader(csv));
final String s = reader.read().getRow(0).get(0);
Assertions.assertEquals("Consultancy, 10", s);
}
@Test
void textDelimiterUncloseTest() {
// 未闭合的文本包装符,文本结尾自动结束,文本包装符
final String csv = "\"Consultancy,";
final CsvReader reader = CsvUtil.getReader(new StringReader(csv));
final String s = reader.read().getRow(0).get(0);
Assertions.assertEquals("Consultancy,", s);
}
@Test
void textDelimiterOfCount3Test() {
// 未闭合的文本包装符,文本结尾自动结束,文本包装符
final String csv = "\"\"\"";
final CsvParser csvParser = new CsvParser(new StringReader(csv), CsvReadConfig.of().setSkipEmptyRows(false));
final CsvRow row = csvParser.nextRow();
Assertions.assertNotNull(row);
Assertions.assertEquals("\"", row.get(0));
}
}

View File

@@ -68,7 +68,7 @@ public class CsvReaderTest {
@Test
public void readAliasMapListTest() {
final CsvReadConfig csvReadConfig = CsvReadConfig.defaultConfig();
final CsvReadConfig csvReadConfig = CsvReadConfig.of();
csvReadConfig.addHeaderAlias("姓名", "name");
final CsvReader reader = CsvUtil.getReader(csvReadConfig);
@@ -135,7 +135,7 @@ public class CsvReaderTest {
@Test
@Disabled
public void readTest3() {
final CsvReadConfig csvReadConfig = CsvReadConfig.defaultConfig();
final CsvReadConfig csvReadConfig = CsvReadConfig.of();
csvReadConfig.setContainsHeader(true);
final CsvReader reader = CsvUtil.getReader(csvReadConfig);
final CsvData read = reader.read(FileUtil.file("d:/test/ceshi.csv"));
@@ -164,7 +164,7 @@ public class CsvReaderTest {
@Test
public void lineLimitTest() {
// 从原始第2行开始读取
final CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2));
final CsvReader reader = new CsvReader(CsvReadConfig.of().setBeginLineNo(2));
final CsvData data = reader.read(
ResourceUtil.getUtf8Reader("test_lines.csv"), true);
@@ -183,7 +183,7 @@ public class CsvReaderTest {
@Test
public void lineLimitWithHeaderTest() {
// 从原始第2行开始读取
final CsvReader reader = new CsvReader(CsvReadConfig.defaultConfig().setBeginLineNo(2).setContainsHeader(true));
final CsvReader reader = new CsvReader(CsvReadConfig.of().setBeginLineNo(2).setContainsHeader(true));
final CsvData data = reader.read(
ResourceUtil.getUtf8Reader("test_lines.csv"), true);
@@ -199,7 +199,7 @@ public class CsvReaderTest {
@Test
public void customConfigTest() {
final CsvReader reader = CsvUtil.getReader(
CsvReadConfig.defaultConfig()
CsvReadConfig.of()
.setTextDelimiter('\'')
.setFieldSeparator(';'));
final CsvData csvRows = reader.readFromStr("123;456;'789;0'abc;");
@@ -211,7 +211,7 @@ public class CsvReaderTest {
@Test
public void readDisableCommentTest() {
final CsvReader reader = CsvUtil.getReader(CsvReadConfig.defaultConfig().disableComment());
final CsvReader reader = CsvUtil.getReader(CsvReadConfig.of().disableComment());
final CsvData read = reader.read(
ResourceUtil.getUtf8Reader("test.csv"), true);
final CsvRow row = read.getRow(0);

View File

@@ -48,11 +48,12 @@ public class CsvUtilTest {
Assertions.assertEquals("关注\"对象\"", row0.get(3));
Assertions.assertEquals("年龄", row0.get(4));
Assertions.assertEquals("", row0.get(5));
Assertions.assertEquals("\"", row0.get(6));
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
Assertions.assertEquals("\"\n", row0.get(6));
}
@Test
public void readTest2() {
public void readUseConsumerTest() {
final CsvReader reader = CsvUtil.getReader();
reader.read(FileUtil.getUtf8Reader("test.csv"), true, (csvRow)-> {
// 只有一行,所以直接判断
@@ -62,7 +63,8 @@ public class CsvUtilTest {
Assertions.assertEquals("关注\"对象\"", csvRow.get(3));
Assertions.assertEquals("年龄", csvRow.get(4));
Assertions.assertEquals("", csvRow.get(5));
Assertions.assertEquals("\"", csvRow.get(6));
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
Assertions.assertEquals("\"\n", csvRow.get(6));
});
}
@@ -75,7 +77,7 @@ public class CsvUtilTest {
}
@Test
public void readCsvStr1(){
public void readCsvStr1WithUncloseTest(){
final CsvData data = CsvUtil.getReader().readFromStr("# 这是一行注释,读取时应忽略\n" +
"\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n");
final List<CsvRow> rows = data.getRows();
@@ -86,11 +88,28 @@ public class CsvUtilTest {
Assertions.assertEquals("关注\"对象\"", row0.get(3));
Assertions.assertEquals("年龄", row0.get(4));
Assertions.assertEquals("", row0.get(5));
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
Assertions.assertEquals("\"\n", row0.get(6));
}
@Test
public void readCsvStr1WithUncloseTrimTest(){
final CsvData data = CsvUtil.getReader(CsvReadConfig.of().setTrimField(true))
.readFromStr("# 这是一行注释,读取时应忽略\n" +
"\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n");
final List<CsvRow> rows = data.getRows();
final CsvRow row0 = rows.get(0);
Assertions.assertEquals("sss,sss", row0.get(0));
Assertions.assertEquals("姓名", row0.get(1));
Assertions.assertEquals("性别", row0.get(2));
Assertions.assertEquals("关注\"对象\"", row0.get(3));
Assertions.assertEquals("年龄", row0.get(4));
Assertions.assertEquals("", row0.get(5));
Assertions.assertEquals("\"", row0.get(6));
}
@Test
public void readCsvStr2(){
public void readCsvStrUseConsumerTest(){
CsvUtil.getReader().readFromStr("# 这是一行注释,读取时应忽略\n" +
"\"sss,sss\",姓名,\"性别\",关注\"对象\",年龄,\"\",\"\"\"\n",(csvRow)-> {
// 只有一行,所以直接判断
@@ -100,7 +119,8 @@ public class CsvUtilTest {
Assertions.assertEquals("关注\"对象\"", csvRow.get(3));
Assertions.assertEquals("年龄", csvRow.get(4));
Assertions.assertEquals("", csvRow.get(5));
Assertions.assertEquals("\"", csvRow.get(6));
// 由于"""未闭合包装,因此末尾的换行符被当作包装内的内容,相当于:"""\n",转义后就是"\n
Assertions.assertEquals("\"\n", csvRow.get(6));
});
}