Skip to content

Commit d285833

Browse files
DOC: fix PR07 for pandas.pivot_table
1 parent b0c4194 commit d285833

File tree

2 files changed

+166
-4
lines changed

2 files changed

+166
-4
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
473473
-i "pandas.option_context SA01" \
474474
-i "pandas.period_range RT03,SA01" \
475475
-i "pandas.pivot PR07" \
476-
-i "pandas.pivot_table PR07" \
477476
-i "pandas.plotting.andrews_curves RT03,SA01" \
478477
-i "pandas.plotting.autocorrelation_plot RT03,SA01" \
479478
-i "pandas.plotting.lag_plot RT03,SA01" \

pandas/core/reshape/pivot.py

Lines changed: 166 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@
5555

5656

5757
# Note: We need to make sure `frame` is imported before `pivot`, otherwise
58-
# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency
59-
@Substitution("\ndata : DataFrame")
60-
@Appender(_shared_docs["pivot_table"], indents=1)
58+
# _shared_docs['pivot_table'] will not yet exist. T
6159
def pivot_table(
6260
data: DataFrame,
6361
values=None,
@@ -71,6 +69,171 @@ def pivot_table(
7169
observed: bool = True,
7270
sort: bool = True,
7371
) -> DataFrame:
72+
"""
73+
Create a spreadsheet-style pivot table as a DataFrame.
74+
75+
The levels in the pivot table will be stored in MultiIndex objects
76+
(hierarchical indexes) on the index and columns of the result DataFrame.
77+
78+
Parameters
79+
----------
80+
data : DataFrame
81+
Input pandas DataFrame object.
82+
values : list-like or scalar, optional
83+
Column or columns to aggregate.
84+
index : column, Grouper, array, or list of the previous
85+
Keys to group by on the pivot table index. If a list is passed,
86+
it can contain any of the other types (except list). If an array is
87+
passed, it must be the same length as the data and will be used in
88+
the same manner as column values.
89+
columns : column, Grouper, array, or list of the previous
90+
Keys to group by on the pivot table column. If a list is passed,
91+
it can contain any of the other types (except list). If an array is
92+
passed, it must be the same length as the data and will be used in
93+
the same manner as column values.
94+
aggfunc : function, list of functions, dict, default "mean"
95+
If a list of functions is passed, the resulting pivot table will have
96+
hierarchical columns whose top level are the function names
97+
(inferred from the function objects themselves).
98+
If a dict is passed, the key is column to aggregate and the value is
99+
function or list of functions. If ``margin=True``, aggfunc will be
100+
used to calculate the partial aggregates.
101+
fill_value : scalar, default None
102+
Value to replace missing values with (in the resulting pivot table,
103+
after aggregation).
104+
margins : bool, default False
105+
If ``margins=True``, special ``All`` columns and rows
106+
will be added with partial group aggregates across the categories
107+
on the rows and columns.
108+
dropna : bool, default True
109+
Do not include columns whose entries are all NaN. If True,
110+
rows with a NaN value in any column will be omitted before
111+
computing margins.
112+
margins_name : str, default 'All'
113+
Name of the row / column that will contain the totals
114+
when margins is True.
115+
observed : bool, default False
116+
This only applies if any of the groupers are Categoricals.
117+
If True: only show observed values for categorical groupers.
118+
If False: show all values for categorical groupers.
119+
120+
.. versionchanged:: 3.0.0
121+
122+
The default value is now ``True``.
123+
124+
sort : bool, default True
125+
Specifies if the result should be sorted.
126+
127+
.. versionadded:: 1.3.0
128+
129+
Returns
130+
-------
131+
DataFrame
132+
An Excel style pivot table.
133+
134+
See Also
135+
--------
136+
DataFrame.pivot : Pivot without aggregation that can handle
137+
non-numeric data.
138+
DataFrame.melt: Unpivot a DataFrame from wide to long format,
139+
optionally leaving identifiers set.
140+
wide_to_long : Wide panel to long format. Less flexible but more
141+
user-friendly than melt.
142+
143+
Notes
144+
-----
145+
Reference :ref:`the user guide <reshaping.pivot>` for more examples.
146+
147+
Examples
148+
--------
149+
>>> df = pd.DataFrame(
150+
... {
151+
... "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
152+
... "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
153+
... "C": [
154+
... "small",
155+
... "large",
156+
... "large",
157+
... "small",
158+
... "small",
159+
... "large",
160+
... "small",
161+
... "small",
162+
... "large",
163+
... ],
164+
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
165+
... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
166+
... }
167+
... )
168+
>>> df
169+
A B C D E
170+
0 foo one small 1 2
171+
1 foo one large 2 4
172+
2 foo one large 2 5
173+
3 foo two small 3 5
174+
4 foo two small 3 6
175+
5 bar one large 4 6
176+
6 bar one small 5 8
177+
7 bar two small 6 9
178+
8 bar two large 7 9
179+
180+
This first example aggregates values by taking the sum.
181+
182+
>>> table = pd.pivot_table(
183+
... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
184+
... )
185+
>>> table
186+
C large small
187+
A B
188+
bar one 4.0 5.0
189+
two 7.0 6.0
190+
foo one 4.0 1.0
191+
two NaN 6.0
192+
193+
We can also fill missing values using the `fill_value` parameter.
194+
195+
>>> table = pd.pivot_table(
196+
... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum", fill_value=0
197+
... )
198+
>>> table
199+
C large small
200+
A B
201+
bar one 4 5
202+
two 7 6
203+
foo one 4 1
204+
two 0 6
205+
206+
The next example aggregates by taking the mean across multiple columns.
207+
208+
>>> table = pd.pivot_table(
209+
... df, values=["D", "E"], index=["A", "C"], aggfunc={"D": "mean", "E": "mean"}
210+
... )
211+
>>> table
212+
D E
213+
A C
214+
bar large 5.500000 7.500000
215+
small 5.500000 8.500000
216+
foo large 2.000000 4.500000
217+
small 2.333333 4.333333
218+
219+
We can also calculate multiple types of aggregations for any given
220+
value column.
221+
222+
>>> table = pd.pivot_table(
223+
... df,
224+
... values=["D", "E"],
225+
... index=["A", "C"],
226+
... aggfunc={"D": "mean", "E": ["min", "max", "mean"]},
227+
... )
228+
>>> table
229+
D E
230+
mean max mean min
231+
A C
232+
bar large 5.500000 9 7.500000 6
233+
small 5.500000 9 8.500000 8
234+
foo large 2.000000 5 4.500000 4
235+
small 2.333333 6 4.333333 2
236+
"""
74237
index = _convert_by(index)
75238
columns = _convert_by(columns)
76239

0 commit comments

Comments
 (0)