Skip to content

Commit 6755f08

Browse files
committed
read_excel - added comment as named argument comment and test_comment_* tests
1 parent 993b816 commit 6755f08

File tree

2 files changed

+65
-0
lines changed

2 files changed

+65
-0
lines changed

pandas/io/excel.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@
154154
convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
155155
data will be read in as floats: Excel stores all numbers as floats
156156
internally
157+
comment : str, default None
158+
Comment out remainder of line. Character or characters to indicate comments
159+
in the input file. Any data between comment and line end is ignored.
157160
158161
Returns
159162
-------
@@ -294,6 +297,7 @@ def read_excel(io,
294297
thousands=None,
295298
skip_footer=0,
296299
convert_float=True,
300+
comment=None,
297301
**kwds):
298302

299303
# Can't use _deprecate_kwarg since sheetname=None has a special meaning
@@ -327,6 +331,7 @@ def read_excel(io,
327331
thousands=thousands,
328332
skip_footer=skip_footer,
329333
convert_float=convert_float,
334+
comment=comment,
330335
**kwds)
331336

332337

@@ -409,6 +414,7 @@ def parse(self,
409414
thousands=None,
410415
skip_footer=0,
411416
convert_float=True,
417+
comment=None,
412418
**kwds):
413419
"""
414420
Parse specified sheet(s) into a DataFrame
@@ -434,6 +440,7 @@ def parse(self,
434440
thousands=thousands,
435441
skip_footer=skip_footer,
436442
convert_float=convert_float,
443+
comment=comment,
437444
**kwds)
438445

439446
def _should_parse(self, i, usecols):
@@ -488,6 +495,7 @@ def _parse_excel(self,
488495
thousands=None,
489496
skip_footer=0,
490497
convert_float=True,
498+
comment=None,
491499
**kwds):
492500

493501
skipfooter = kwds.pop('skipfooter', None)
@@ -665,6 +673,7 @@ def _parse_cell(cell_contents, cell_typ):
665673
date_parser=date_parser,
666674
thousands=thousands,
667675
skipfooter=skip_footer,
676+
comment=comment,
668677
**kwds)
669678

670679
output[asheetname] = parser.read(nrows=nrows)

pandas/tests/io/test_excel.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1862,6 +1862,62 @@ def test_invalid_columns(self):
18621862
with pytest.raises(KeyError):
18631863
write_frame.to_excel(path, 'test1', columns=['C', 'D'])
18641864

1865+
def test_comment_arg(self):
1866+
# Test the comment argument functionality to read_excel
1867+
with ensure_clean(self.ext) as path:
1868+
1869+
# Create file to read in
1870+
write_frame = DataFrame({'A': ['one', '#one', 'one'],
1871+
'B': ['two', 'two', '#two']})
1872+
write_frame.to_excel(path, 'test_c')
1873+
1874+
# Read file without comment arg
1875+
read_frame = read_excel(path, 'test_c')
1876+
read_frame_commented = read_excel(path, 'test_c', comment='#')
1877+
tm.assert_class_equal(read_frame, read_frame_commented)
1878+
1879+
def test_comment_default(self):
1880+
# Test the comment argument default to read_excel
1881+
with ensure_clean(self.ext) as path:
1882+
1883+
# Create file to read in
1884+
write_frame = DataFrame({'A': ['one', '#one', 'one'],
1885+
'B': ['two', 'two', '#two']})
1886+
write_frame.to_excel(path, 'test_c')
1887+
1888+
# Read file with default and explicit comment=None
1889+
read_frame = read_excel(path, 'test_c')
1890+
read_frame_uncommented = read_excel(path, 'test_c', comment=None)
1891+
tm.assert_frame_equal(read_frame, read_frame_uncommented)
1892+
1893+
def test_comment_used(self):
1894+
# Test the comment argument is working as expected when used
1895+
with ensure_clean(self.ext) as path:
1896+
1897+
# Create file to read in
1898+
write_frame = DataFrame({'A': ['one', '#one', 'one'],
1899+
'B': ['two', 'two', '#two']})
1900+
write_frame.to_excel(path, 'test_c')
1901+
1902+
# Test read_frame_comment against manually produced expected output
1903+
read_frame_commented = read_excel(path, 'test_c', comment='#')
1904+
expected = read_excel(path, 'test_c')
1905+
expected.iloc[1, 0] = None
1906+
expected.iloc[1, 1] = None
1907+
expected.iloc[2, 1] = None
1908+
tm.assert_frame_equal(read_frame_commented, expected)
1909+
1910+
def test_comment_emptyline(self):
1911+
# Test that read_excel ignores commented lines at the end of file
1912+
with ensure_clean(self.ext) as path:
1913+
1914+
write_frame = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']})
1915+
write_frame.to_excel(path, index=False)
1916+
1917+
# Test that all-comment lines at EoF are ignored
1918+
read_frame_short = read_excel(path, comment='#')
1919+
assert (read_frame_short.shape == write_frame.iloc[0:1, :].shape)
1920+
18651921
def test_datetimes(self):
18661922

18671923
# Test writing and reading datetimes. For issue #9139. (xref #9185)

0 commit comments

Comments
 (0)