Skip to content

Commit 2ef69a1

Browse files
miss-islingtonjake-jake-jake
authored andcommitted
bpo-30157: Fix csv.Sniffer.sniff() regex pattern. (GH-5601) (GH-5602)
Co-authored-by: Jake Davis <[email protected]> (cherry picked from commit 2411292)
1 parent 7df8049 commit 2ef69a1

File tree

4 files changed

+14
-1
lines changed

4 files changed

+14
-1
lines changed

Lib/csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def _guess_quote_and_delimiter(self, data, delimiters):
217217
matches = []
218218
for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
219219
r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
220-
r'(?P<delim>>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
220+
r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
221221
r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
222222
regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
223223
matches = regexp.findall(data)

Lib/test/test_csv.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,6 +986,16 @@ def test_has_header_regex_special_delimiter(self):
986986
self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
987987
True)
988988

989+
def test_guess_quote_and_delimiter(self):
990+
sniffer = csv.Sniffer()
991+
for header in (";'123;4';", "'123;4';", ";'123;4'", "'123;4'"):
992+
with self.subTest(header):
993+
dialect = sniffer.sniff(header, ",;")
994+
self.assertEqual(dialect.delimiter, ';')
995+
self.assertEqual(dialect.quotechar, "'")
996+
self.assertIs(dialect.doublequote, False)
997+
self.assertIs(dialect.skipinitialspace, False)
998+
989999
def test_sniff(self):
9901000
sniffer = csv.Sniffer()
9911001
dialect = sniffer.sniff(self.sample1)

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ Kushal Das
355355
Jonathan Dasteel
356356
Pierre-Yves David
357357
A. Jesse Jiryu Davis
358+
Jake Davis
358359
Ratnadeep Debnath
359360
Merlijn van Deen
360361
John DeGood
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixed guessing quote and delimiter in csv.Sniffer.sniff() when only the last
2+
field is quoted. Patch by Jake Davis.

0 commit comments

Comments
 (0)