Skip to content

Commit

Permalink
CSV: fix parsing files with double-quote inside a field value
Browse files Browse the repository at this point in the history
  • Loading branch information
rouault committed Jan 15, 2025
1 parent 6f9326a commit dbf913c
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
first,second,third
1,two"with quote,3
10,twenty"with quote,30
20 changes: 20 additions & 0 deletions autotest/ogr/ogr_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2800,6 +2800,26 @@ def test_ogr_csv_double_quotes_in_middle_of_field():
assert f["str"] == "foo"


###############################################################################
# Test bugfix for https://github.com/OSGeo/gdal/issues/11660


def test_ogr_csv_double_quotes_in_middle_of_field_bis():

ds = ogr.Open("data/csv/double_quotes_in_middle_of_field_bis.csv")
lyr = ds.GetLayer(0)

f = lyr.GetNextFeature()
assert f["first"] == "1"
assert f["second"] == """two"with quote"""
assert f["third"] == "3"

f = lyr.GetNextFeature()
assert f["first"] == "10"
assert f["second"] == """twenty"with quote"""
assert f["third"] == "30"


###############################################################################


Expand Down
61 changes: 42 additions & 19 deletions port/cpl_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,45 +647,68 @@ CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
bMergeDelimiter);

const size_t nDelimiterLength = strlen(pszDelimiter);
bool bInString = false; // keep in that scope !
std::string osWorkLine; // keep in that scope !
size_t i = 0; // keep in that scope !

try
{
// We must now count the quotes in our working string, and as
// long as it is odd, keep adding new lines.
std::string osWorkLine(pszLine);

size_t i = 0;
int nCount = 0;

while (true)
{
for (; i < osWorkLine.size(); i++)
for (; pszLine[i]; ++i)
{
if (osWorkLine[i] == '\"')
nCount++;
if (pszLine[i] == '\"')
{
if (!bInString)
{
// Only consider " as the start of a quoted string
// if it is the first character of the line, or
// if it is immediately after the field delimiter.
if (i == 0 ||
(i >= nDelimiterLength &&
memcmp(pszLine + (i - nDelimiterLength),
pszDelimiter, nDelimiterLength) == 0))
{
bInString = true;
}
}
else if (pszLine[i + 1] == '"')
{
// Escaped double quote in a quoted string
++i;
}
else
{
bInString = false;
}
}
}

if (nCount % 2 == 0)
break;
if (!bInString)
{
return CSVSplitLine(pszLine, pszDelimiter,
bKeepLeadingAndClosingQuotes,
bMergeDelimiter);
}

if (osWorkLine.empty())
osWorkLine = pszLine;

pszLine = pfnReadLine(fp, nMaxLineSize);
if (pszLine == nullptr)
break;

osWorkLine.append("\n");
osWorkLine.append(pszLine);
pszLine = osWorkLine.c_str();
}

char **papszReturn =
CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
bKeepLeadingAndClosingQuotes, bMergeDelimiter);

return papszReturn;
}
catch (const std::exception &e)
{
CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
return nullptr;
}
return nullptr;
}

/************************************************************************/
Expand Down

0 comments on commit dbf913c

Please sign in to comment.