DOC: read_excel - added examples and fixed formatting bug

JanLauGe · JanLauGe · commit 993b81696705 · 2017-12-12T23:34:30.000Z
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -137,7 +137,7 @@
 na_values : scalar, str, list-like, or dict, default None
     Additional strings to recognize as NA/NaN. If dict passed, specific
     per-column NA values. By default the following values are interpreted
-    as NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), 70) + """'.
+    as NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), 999) + """'.
 keep_default_na : bool, default True
     If na_values are specified and keep_default_na is False the default NaN
     values are overridden, otherwise they're appended to.
@@ -160,6 +160,80 @@
 parsed : DataFrame or Dict of DataFrames
     DataFrame from the passed in Excel file.  See notes in sheet_name
     argument for more information on when a Dict of Dataframes is returned.
+
+Examples
+--------
+
+An example DataFrame written to a local file
+
+>>> df_out = pd.DataFrame([('string1', 1),
+...                        ('string2', 2),
+...                        ('string3', 3)],
+...                       columns=('Name', 'Value'))
+>>> df_out
+      Name  Value
+0  string1      1
+1  string2      2
+2  string3      3
+>>> df_out.to_excel('tmp.xlsx')
+
+The file can be read using the file name as string or an open file object:
+
+>>> pd.read_excel('tmp.xlsx')
+      Name  Value
+0  string1      1
+1  string2      2
+2  string3      3
+
+>>> pd.read_excel(open('tmp.xlsx','rb'))
+      Name  Value
+0  string1      1
+1  string2      2
+2  string3      3
+
+Index and header can be specified via the `index_col` and `header` arguments
+
+>>> pd.read_excel(open('tmp.xlsx','rb'), index_col=None, header=None)
+     0        1      2
+0  NaN     Name  Value
+1  0.0  string1      1
+2  1.0  string2      2
+3  2.0  string3      3
+
+Column types are inferred but can be explicitly specified
+
+>>> pd.read_excel(open('tmp.xlsx','rb'), dtype={'Name':str, 'Value':float})
+      Name  Value
+0  string1    1.0
+1  string2    2.0
+2  string3    3.0
+
+True, False, and NA values, and thousands separators have defaults,
+but can be explicitly specified, too. Supply the values you would like
+as strings or lists of strings!
+
+>>> pd.read_excel(open('tmp.xlsx','rb'),
+...               true_values='2',
+...               false_values='3',
+...               na_values=['string1', 'string2'],
+...               thousands=',')
+      Name  Value
+0      NaN      1
+1      NaN      2
+2  string3      3
+
+Comment lines in the excel input file can be skipped using the `comment` kwarg
+
+>>> df = pd.DataFrame({'a': ['1', '#2'], 'b': ['2', '3']})
+>>> df.to_excel('tmp.xlsx', index=False)
+>>> pd.read_excel('tmp.xlsx')
+    a  b
+0   1  2
+1  #2  3
+
+>>> pd.read_excel('tmp.xlsx', comment='#')
+   a  b
+0  1  2
 """