修复CsvParser中对正文中双引号处理逻辑问题

This commit is contained in:
Looly
2024-07-31 01:14:39 +08:00
parent 967f3d2ca0
commit 1a0ae09047
3 changed files with 60 additions and 19 deletions

View File

@@ -2645,7 +2645,7 @@ public class CharSequenceUtil extends StrValidator {
if (isEmpty(str)) { if (isEmpty(str)) {
return toStringOrNull(str); return toStringOrNull(str);
} }
if (str.charAt(0) == prefix && str.charAt(str.length() - 1) == suffix) { if (isWrap(str, prefix, suffix)) {
return sub(str, 1, str.length() - 1); return sub(str, 1, str.length() - 1);
} }
return str.toString(); return str.toString();

View File

@@ -302,8 +302,8 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
buf.mark(); buf.mark();
addField(currentFields, currentField.toString()); addField(currentFields, currentField.toString());
currentField.setLength(0); currentField.setLength(0);
} else if (c == config.textDelimiter) { } else if (c == config.textDelimiter && isFieldBegin(preChar)) {
// 引号开始 // 引号开始且出现在字段开头
inQuotes = true; inQuotes = true;
copyLen++; copyLen++;
} else if (c == CharUtil.CR) { } else if (c == CharUtil.CR) {
@@ -363,8 +363,12 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
// 忽略多余引号后的换行符 // 忽略多余引号后的换行符
field = StrUtil.trim(field, StrTrimer.TrimMode.SUFFIX, (c -> c == CharUtil.LF || c == CharUtil.CR)); field = StrUtil.trim(field, StrTrimer.TrimMode.SUFFIX, (c -> c == CharUtil.LF || c == CharUtil.CR));
field = StrUtil.unWrap(field, textDelimiter); if(StrUtil.isWrap(field, textDelimiter)){
field = StrUtil.sub(field, 1, field.length() - 1);
// https://datatracker.ietf.org/doc/html/rfc4180#section-2
// 第七条规则,只有包装内的包装符需要转义
field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter)); field = StrUtil.replace(field, String.valueOf(textDelimiter) + textDelimiter, String.valueOf(textDelimiter));
}
if (this.config.trimField) { if (this.config.trimField) {
// issue#I49M0C@Gitee // issue#I49M0C@Gitee
field = StrUtil.trim(field); field = StrUtil.trim(field);
@@ -384,6 +388,24 @@ public final class CsvParser extends ComputeIter<CsvRow> implements Closeable, S
return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR; return (c == CharUtil.CR || c == CharUtil.LF) && preChar != CharUtil.CR;
} }
/**
* 通过前一个字符,判断是否字段开始,几种情况:
* <ul>
* <li>正文开头,无前字符</li>
* <li>字段分隔符,即上个字段结束</li>
* <li>换行符,即新行开始</li>
* </ul>
*
* @param preChar 前字符
* @return 是否字段开始
*/
private boolean isFieldBegin(final int preChar) {
return preChar == -1
|| preChar == config.fieldSeparator
|| preChar == CharUtil.LF
|| preChar == CharUtil.CR;
}
/** /**
* 内部Buffer * 内部Buffer
* *

View File

@@ -8,9 +8,13 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
/** /**
* 按照 https://datatracker.ietf.org/doc/html/rfc4180#section-2<br> * 按照 https://datatracker.ietf.org/doc/html/rfc4180#section-2<br>
* 如果字段正文中出现双引号,需要使用两个双引号表示转义 * 如果字段正文中出现双引号,需要使用两个双引号表示转义,并整段使用引号包裹
*/ */
public class Pr1244Test { public class Pr1244Test {
/**
* 此测试中没有引号包裹,则所有引号都被当作内容
*/
@Test @Test
void csvReadTest() { void csvReadTest() {
final String csv = "a,q\"\"e,d,f"; final String csv = "a,q\"\"e,d,f";
@@ -18,6 +22,21 @@ public class Pr1244Test {
final CsvData read = reader.read(); final CsvData read = reader.read();
assertEquals(4, read.getRow(0).size()); assertEquals(4, read.getRow(0).size());
assertEquals("a", read.getRow(0).get(0)); assertEquals("a", read.getRow(0).get(0));
assertEquals("q\"\"e", read.getRow(0).get(1));
assertEquals("d", read.getRow(0).get(2));
assertEquals("f", read.getRow(0).get(3));
}
/**
* 此测试中没有引号包裹,则所有引号都被当作内容
*/
@Test
void csvReadTest2() {
final String csv = "a,q\"e,d,f";
final CsvReader reader = CsvUtil.getReader(new StringReader(csv));
final CsvData read = reader.read();
assertEquals(4, read.getRow(0).size());
assertEquals("a", read.getRow(0).get(0));
assertEquals("q\"e", read.getRow(0).get(1)); assertEquals("q\"e", read.getRow(0).get(1));
assertEquals("d", read.getRow(0).get(2)); assertEquals("d", read.getRow(0).get(2));
assertEquals("f", read.getRow(0).get(3)); assertEquals("f", read.getRow(0).get(3));