@@ -1995,52 +1995,134 @@ def plot_series(data, kind='line', ax=None, # Series unique
1995
1995
1996
1996
1997
1997
_shared_docs ['boxplot' ] = """
1998
- Make a box plot from DataFrame column optionally grouped by some columns or
1999
- other inputs
1998
+ Make a box-and-whisker plot from DataFrame column optionally grouped
1999
+ by some columns or other inputs. The box extends from the Q1 to Q3
2000
+ quartile values of the data, with a line at the median (Q2).
2001
+ The whiskers extend from the edges of box to show the range of the data.
2002
+ Flier points (outliers) are those past the end of the whiskers.
2003
+ The position of the whiskers is set by default to 1.5 IQR (`whis=1.5``)
2004
+ from the edge of the box.
2005
+
2006
+ For further details see
2007
+ Uncyclopedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot/>`_.
2000
2008
2001
2009
Parameters
2002
2010
----------
2003
- data : the pandas object holding the data
2004
2011
column : column name or list of names, or vector
2005
- Can be any valid input to groupby
2012
+ Can be any valid input to groupby.
2006
2013
by : string or sequence
2007
- Column in the DataFrame to group by
2008
- ax : Matplotlib axes object, optional
2014
+ Column in the DataFrame to groupby.
2015
+ ax : Matplotlib axes object, (default `None`)
2016
+ The matplotlib axes to be used by boxplot.
2009
2017
fontsize : int or string
2018
+ The font-size used by matplotlib.
2010
2019
rot : label rotation angle
2020
+ The rotation angle of labels.
2021
+ grid : boolean( default `True`)
2022
+ Setting this to True will show the grid.
2011
2023
figsize : A tuple (width, height) in inches
2012
- grid : Setting this to True will show the grid
2024
+ The size of the figure to create in inches by default.
2013
2025
layout : tuple (optional)
2014
- (rows, columns) for the layout of the plot
2026
+ Tuple (rows, columns) used for the layout of the plot.
2015
2027
return_type : {None, 'axes', 'dict', 'both'}, default None
2016
2028
The kind of object to return. The default is ``axes``
2017
2029
'axes' returns the matplotlib axes the boxplot is drawn on;
2018
2030
'dict' returns a dictionary whose values are the matplotlib
2019
2031
Lines of the boxplot;
2020
2032
'both' returns a namedtuple with the axes and dict.
2021
-
2022
2033
When grouping with ``by``, a Series mapping columns to ``return_type``
2023
2034
is returned, unless ``return_type`` is None, in which case a NumPy
2024
2035
array of axes is returned with the same shape as ``layout``.
2025
2036
See the prose documentation for more.
2026
-
2027
- `**kwds` : Keyword Arguments
2037
+ kwds : Keyword Arguments (optional)
2028
2038
All other plotting keyword arguments to be passed to
2029
- matplotlib's boxplot function
2039
+ matplotlib's function.
2030
2040
2031
2041
Returns
2032
2042
-------
2033
2043
lines : dict
2034
2044
ax : matplotlib Axes
2035
- (ax, lines): namedtuple
2045
+ (ax, lines): namedtuple
2046
+
2047
+ See Also
2048
+ --------
2049
+ matplotlib.pyplot.boxplot: Make a box and whisker plot.
2036
2050
2037
2051
Notes
2038
2052
-----
2039
2053
Use ``return_type='dict'`` when you want to tweak the appearance
2040
2054
of the lines after plotting. In this case a dict containing the Lines
2041
2055
making up the boxes, caps, fliers, medians, and whiskers is returned.
2042
- """
2043
2056
2057
+ Examples
2058
+ --------
2059
+ .. plot::
2060
+ :context: close-figs
2061
+
2062
+ >>> np.random.seed(1234)
2063
+
2064
+ >>> df = pd.DataFrame({
2065
+ ... u'stratifying_var': np.random.uniform(0, 100, 20),
2066
+ ... u'price': np.random.normal(100, 5, 20),
2067
+ ... u'demand': np.random.normal(100, 10, 20)})
2068
+
2069
+ >>> df[u'quartiles'] = pd.qcut(
2070
+ ... df[u'stratifying_var'], 4,
2071
+ ... labels=[u'0-25%%', u'25-50%%', u'50-75%%', u'75-100%%'])
2072
+
2073
+ >>> df
2074
+ stratifying_var price demand quartiles
2075
+ 0 19.151945 106.605791 108.416747 0-25%%
2076
+ 1 62.210877 92.265472 123.909605 50-75%%
2077
+ 2 43.772774 98.986768 100.761996 25-50%%
2078
+ 3 78.535858 96.720153 94.335541 75-100%%
2079
+ 4 77.997581 100.967107 100.361419 50-75%%
2080
+ 5 27.259261 102.767195 79.250224 0-25%%
2081
+ 6 27.646426 106.590758 102.477922 0-25%%
2082
+ 7 80.187218 97.653474 91.028432 75-100%%
2083
+ 8 95.813935 103.377770 98.632052 75-100%%
2084
+ 9 87.593263 90.914864 100.182892 75-100%%
2085
+ 10 35.781727 99.084457 107.554140 0-25%%
2086
+ 11 50.099513 105.294846 102.152686 25-50%%
2087
+ 12 68.346294 98.010799 108.410088 50-75%%
2088
+ 13 71.270203 101.687188 85.541899 50-75%%
2089
+ 14 37.025075 105.237893 85.980267 25-50%%
2090
+ 15 56.119619 105.229691 98.990818 25-50%%
2091
+ 16 50.308317 104.318586 94.517576 25-50%%
2092
+ 17 1.376845 99.389542 98.553805 0-25%%
2093
+ 18 77.282662 100.623565 103.540203 50-75%%
2094
+ 19 88.264119 98.386026 99.644870 75-100%%
2095
+
2096
+ To plot the boxplot of the ``demand`` just put:
2097
+
2098
+ .. plot::
2099
+ :context: close-figs
2100
+
2101
+ >>> boxplot = df.boxplot(column=u'demand', by=u'quartiles')
2102
+
2103
+ Use ``grid=False`` to hide the grid:
2104
+
2105
+ .. plot::
2106
+ :context: close-figs
2107
+
2108
+ >>> boxplot = df.boxplot(column=u'demand', by=u'quartiles', grid=False)
2109
+
2110
+ Optionally, the layout can be changed by setting ``layout=(rows, cols)``:
2111
+
2112
+ .. plot::
2113
+ :context: close-figs
2114
+
2115
+ >>> boxplot = df.boxplot(column=[u'price',u'demand'],
2116
+ ... by=u'quartiles', layout=(1,2),
2117
+ ... figsize=(8,5))
2118
+
2119
+ .. plot::
2120
+ :context: close-figs
2121
+
2122
+ >>> boxplot = df.boxplot(column=[u'price',u'demand'],
2123
+ ... by=u'quartiles', layout=(2,1),
2124
+ ... figsize=(5,8))
2125
+ """
2044
2126
2045
2127
@Appender (_shared_docs ['boxplot' ] % _shared_doc_kwargs )
2046
2128
def boxplot (data , column = None , by = None , ax = None , fontsize = None ,
0 commit comments