55
55
56
56
57
57
# Note: We need to make sure `frame` is imported before `pivot`, otherwise
58
- # _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency
59
- @Substitution ("\n data : DataFrame" )
60
- @Appender (_shared_docs ["pivot_table" ], indents = 1 )
58
+ # _shared_docs['pivot_table'] will not yet exist. T
61
59
def pivot_table (
62
60
data : DataFrame ,
63
61
values = None ,
@@ -71,6 +69,171 @@ def pivot_table(
71
69
observed : bool = True ,
72
70
sort : bool = True ,
73
71
) -> DataFrame :
72
+ """
73
+ Create a spreadsheet-style pivot table as a DataFrame.
74
+
75
+ The levels in the pivot table will be stored in MultiIndex objects
76
+ (hierarchical indexes) on the index and columns of the result DataFrame.
77
+
78
+ Parameters
79
+ ----------
80
+ data : DataFrame
81
+ Input pandas DataFrame object.
82
+ values : list-like or scalar, optional
83
+ Column or columns to aggregate.
84
+ index : column, Grouper, array, or list of the previous
85
+ Keys to group by on the pivot table index. If a list is passed,
86
+ it can contain any of the other types (except list). If an array is
87
+ passed, it must be the same length as the data and will be used in
88
+ the same manner as column values.
89
+ columns : column, Grouper, array, or list of the previous
90
+ Keys to group by on the pivot table column. If a list is passed,
91
+ it can contain any of the other types (except list). If an array is
92
+ passed, it must be the same length as the data and will be used in
93
+ the same manner as column values.
94
+ aggfunc : function, list of functions, dict, default "mean"
95
+ If a list of functions is passed, the resulting pivot table will have
96
+ hierarchical columns whose top level are the function names
97
+ (inferred from the function objects themselves).
98
+ If a dict is passed, the key is column to aggregate and the value is
99
+ function or list of functions. If ``margin=True``, aggfunc will be
100
+ used to calculate the partial aggregates.
101
+ fill_value : scalar, default None
102
+ Value to replace missing values with (in the resulting pivot table,
103
+ after aggregation).
104
+ margins : bool, default False
105
+ If ``margins=True``, special ``All`` columns and rows
106
+ will be added with partial group aggregates across the categories
107
+ on the rows and columns.
108
+ dropna : bool, default True
109
+ Do not include columns whose entries are all NaN. If True,
110
+ rows with a NaN value in any column will be omitted before
111
+ computing margins.
112
+ margins_name : str, default 'All'
113
+ Name of the row / column that will contain the totals
114
+ when margins is True.
115
+ observed : bool, default False
116
+ This only applies if any of the groupers are Categoricals.
117
+ If True: only show observed values for categorical groupers.
118
+ If False: show all values for categorical groupers.
119
+
120
+ .. versionchanged:: 3.0.0
121
+
122
+ The default value is now ``True``.
123
+
124
+ sort : bool, default True
125
+ Specifies if the result should be sorted.
126
+
127
+ .. versionadded:: 1.3.0
128
+
129
+ Returns
130
+ -------
131
+ DataFrame
132
+ An Excel style pivot table.
133
+
134
+ See Also
135
+ --------
136
+ DataFrame.pivot : Pivot without aggregation that can handle
137
+ non-numeric data.
138
+ DataFrame.melt: Unpivot a DataFrame from wide to long format,
139
+ optionally leaving identifiers set.
140
+ wide_to_long : Wide panel to long format. Less flexible but more
141
+ user-friendly than melt.
142
+
143
+ Notes
144
+ -----
145
+ Reference :ref:`the user guide <reshaping.pivot>` for more examples.
146
+
147
+ Examples
148
+ --------
149
+ >>> df = pd.DataFrame(
150
+ ... {
151
+ ... "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
152
+ ... "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
153
+ ... "C": [
154
+ ... "small",
155
+ ... "large",
156
+ ... "large",
157
+ ... "small",
158
+ ... "small",
159
+ ... "large",
160
+ ... "small",
161
+ ... "small",
162
+ ... "large",
163
+ ... ],
164
+ ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
165
+ ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
166
+ ... }
167
+ ... )
168
+ >>> df
169
+ A B C D E
170
+ 0 foo one small 1 2
171
+ 1 foo one large 2 4
172
+ 2 foo one large 2 5
173
+ 3 foo two small 3 5
174
+ 4 foo two small 3 6
175
+ 5 bar one large 4 6
176
+ 6 bar one small 5 8
177
+ 7 bar two small 6 9
178
+ 8 bar two large 7 9
179
+
180
+ This first example aggregates values by taking the sum.
181
+
182
+ >>> table = pd.pivot_table(
183
+ ... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
184
+ ... )
185
+ >>> table
186
+ C large small
187
+ A B
188
+ bar one 4.0 5.0
189
+ two 7.0 6.0
190
+ foo one 4.0 1.0
191
+ two NaN 6.0
192
+
193
+ We can also fill missing values using the `fill_value` parameter.
194
+
195
+ >>> table = pd.pivot_table(
196
+ ... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum", fill_value=0
197
+ ... )
198
+ >>> table
199
+ C large small
200
+ A B
201
+ bar one 4 5
202
+ two 7 6
203
+ foo one 4 1
204
+ two 0 6
205
+
206
+ The next example aggregates by taking the mean across multiple columns.
207
+
208
+ >>> table = pd.pivot_table(
209
+ ... df, values=["D", "E"], index=["A", "C"], aggfunc={"D": "mean", "E": "mean"}
210
+ ... )
211
+ >>> table
212
+ D E
213
+ A C
214
+ bar large 5.500000 7.500000
215
+ small 5.500000 8.500000
216
+ foo large 2.000000 4.500000
217
+ small 2.333333 4.333333
218
+
219
+ We can also calculate multiple types of aggregations for any given
220
+ value column.
221
+
222
+ >>> table = pd.pivot_table(
223
+ ... df,
224
+ ... values=["D", "E"],
225
+ ... index=["A", "C"],
226
+ ... aggfunc={"D": "mean", "E": ["min", "max", "mean"]},
227
+ ... )
228
+ >>> table
229
+ D E
230
+ mean max mean min
231
+ A C
232
+ bar large 5.500000 9 7.500000 6
233
+ small 5.500000 9 8.500000 8
234
+ foo large 2.000000 5 4.500000 4
235
+ small 2.333333 6 4.333333 2
236
+ """
74
237
index = _convert_by (index )
75
238
columns = _convert_by (columns )
76
239
0 commit comments