Skip to content

Commit b7c8741

Browse files
committed
rename labels - codes in core/groupby/
1 parent 08087d6 commit b7c8741

File tree

6 files changed

+61
-63
lines changed

6 files changed

+61
-63
lines changed

pandas/core/groupby/generic.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -655,16 +655,16 @@ def value_counts(
655655
rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx))
656656

657657
# multi-index components
658-
labels = list(map(rep, self.grouper.recons_labels)) + [llab(lab, inc)]
658+
codes = list(map(rep, self.grouper.recons_codes)) + [llab(lab, inc)]
659659
levels = [ping.group_index for ping in self.grouper.groupings] + [lev]
660660
names = self.grouper.names + [self._selection_name]
661661

662662
if dropna:
663-
mask = labels[-1] != -1
663+
mask = codes[-1] != -1
664664
if mask.all():
665665
dropna = False
666666
else:
667-
out, labels = out[mask], [label[mask] for label in labels]
667+
out, codes = out[mask], [level_codes[mask] for level_codes in codes]
668668

669669
if normalize:
670670
out = out.astype("float")
@@ -680,11 +680,11 @@ def value_counts(
680680
if sort and bins is None:
681681
cat = ids[inc][mask] if dropna else ids[inc]
682682
sorter = np.lexsort((out if ascending else -out, cat))
683-
out, labels[-1] = out[sorter], labels[-1][sorter]
683+
out, codes[-1] = out[sorter], codes[-1][sorter]
684684

685685
if bins is None:
686686
mi = MultiIndex(
687-
levels=levels, codes=labels, names=names, verify_integrity=False
687+
levels=levels, codes=codes, names=names, verify_integrity=False
688688
)
689689

690690
if is_integer_dtype(out):
@@ -694,14 +694,14 @@ def value_counts(
694694
# for compat. with libgroupby.value_counts need to ensure every
695695
# bin is present at every index level, null filled with zeros
696696
diff = np.zeros(len(out), dtype="bool")
697-
for lab in labels[:-1]:
698-
diff |= np.r_[True, lab[1:] != lab[:-1]]
697+
for codes_ in codes[:-1]:
698+
diff |= np.r_[True, codes_[1:] != codes_[:-1]]
699699

700700
ncat, nbin = diff.sum(), len(levels[-1])
701701

702702
left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)]
703703

704-
right = [diff.cumsum() - 1, labels[-1]]
704+
right = [diff.cumsum() - 1, codes[-1]]
705705

706706
_, idx = _get_join_indexers(left, right, sort=False, how="left")
707707
out = np.where(idx != -1, out[idx], 0)
@@ -711,7 +711,7 @@ def value_counts(
711711
out, left[-1] = out[sorter], left[-1][sorter]
712712

713713
# build the multi-index w/ full levels
714-
codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
714+
codes = list(map(lambda codes: np.repeat(codes[diff], nbin), codes[:-1]))
715715
codes.append(left[-1])
716716

717717
mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
@@ -758,7 +758,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None):
758758
)
759759
)
760760
filled = getattr(self, fill_method)(limit=limit)
761-
fill_grp = filled.groupby(self.grouper.labels)
761+
fill_grp = filled.groupby(self.grouper.codes)
762762
shifted = fill_grp.shift(periods=periods, freq=freq)
763763

764764
return (filled / shifted) - 1

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2349,7 +2349,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0
23492349
)
23502350
)
23512351
filled = getattr(self, fill_method)(limit=limit)
2352-
fill_grp = filled.groupby(self.grouper.labels)
2352+
fill_grp = filled.groupby(self.grouper.codes)
23532353
shifted = fill_grp.shift(periods=periods, freq=freq)
23542354
return (filled / shifted) - 1
23552355

pandas/core/groupby/grouper.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class Grouper:
5959
<http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_.
6060
axis : number/name of the axis, defaults to 0
6161
sort : bool, default to False
62-
whether to sort the resulting labels
62+
whether to sort the resulting codes
6363
closed : {'left' or 'right'}
6464
Closed end of interval. Only when `freq` parameter is passed.
6565
label : {'left' or 'right'}
@@ -231,7 +231,7 @@ class Grouping:
231231
obj :
232232
name :
233233
level :
234-
observed : boolean, default False
234+
observed : bool, default False
235235
If we are a Categorical, use the observed values
236236
in_axis : if the Grouping is a column in self.obj and hence among
237237
Groupby.exclusions list
@@ -240,9 +240,7 @@ class Grouping:
240240
-------
241241
**Attributes**:
242242
* indices : dict of {group -> index_list}
243-
* labels : ndarray, group labels
244-
* ids : mapping of label -> group
245-
* counts : array of group counts
243+
* codes : ndarray, group codes
246244
* group_index : unique groups
247245
* groups : dict of {group -> label_list}
248246
"""
@@ -290,12 +288,12 @@ def __init__(
290288
if self.name is None:
291289
self.name = index.names[level]
292290

293-
self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501
291+
self.grouper, self._codes, self._group_index = index._get_grouper_for_level( # noqa: E501
294292
self.grouper, level
295293
)
296294

297295
# a passed Grouper like, directly get the grouper in the same way
298-
# as single grouper groupby, use the group_info to get labels
296+
# as single grouper groupby, use the group_info to get codes
299297
elif isinstance(self.grouper, Grouper):
300298
# get the new grouper; we already have disambiguated
301299
# what key/level refer to exactly, don't need to
@@ -324,7 +322,7 @@ def __init__(
324322

325323
# we make a CategoricalIndex out of the cat grouper
326324
# preserving the categories / ordered attributes
327-
self._labels = self.grouper.codes
325+
self._codes = self.grouper.codes
328326
if observed:
329327
codes = algorithms.unique1d(self.grouper.codes)
330328
codes = codes[codes != -1]
@@ -380,7 +378,7 @@ def __repr__(self):
380378
def __iter__(self):
381379
return iter(self.indices)
382380

383-
_labels = None
381+
_codes = None
384382
_group_index = None
385383

386384
@property
@@ -397,10 +395,10 @@ def indices(self):
397395
return values._reverse_indexer()
398396

399397
@property
400-
def labels(self):
401-
if self._labels is None:
402-
self._make_labels()
403-
return self._labels
398+
def codes(self):
399+
if self._codes is None:
400+
self._make_codes()
401+
return self._codes
404402

405403
@cache_readonly
406404
def result_index(self):
@@ -411,24 +409,24 @@ def result_index(self):
411409
@property
412410
def group_index(self):
413411
if self._group_index is None:
414-
self._make_labels()
412+
self._make_codes()
415413
return self._group_index
416414

417-
def _make_labels(self):
418-
if self._labels is None or self._group_index is None:
415+
def _make_codes(self):
416+
if self._codes is None or self._group_index is None:
419417
# we have a list of groupers
420418
if isinstance(self.grouper, BaseGrouper):
421-
labels = self.grouper.label_info
419+
codes = self.grouper.codes_info
422420
uniques = self.grouper.result_index
423421
else:
424-
labels, uniques = algorithms.factorize(self.grouper, sort=self.sort)
422+
codes, uniques = algorithms.factorize(self.grouper, sort=self.sort)
425423
uniques = Index(uniques, name=self.name)
426-
self._labels = labels
424+
self._codes = codes
427425
self._group_index = uniques
428426

429427
@cache_readonly
430428
def groups(self):
431-
return self.index.groupby(Categorical.from_codes(self.labels, self.group_index))
429+
return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
432430

433431

434432
def _get_grouper(

pandas/core/groupby/ops.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def _get_group_keys(self):
139139
comp_ids, _, ngroups = self.group_info
140140

141141
# provide "flattened" iterator for multi-group setting
142-
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.labels)
142+
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes)
143143

144144
def apply(self, f, data, axis: int = 0):
145145
mutated = self.mutated
@@ -210,13 +210,13 @@ def indices(self):
210210
if len(self.groupings) == 1:
211211
return self.groupings[0].indices
212212
else:
213-
label_list = [ping.labels for ping in self.groupings]
213+
codes_list = [ping.codes for ping in self.groupings]
214214
keys = [com.values_from_object(ping.group_index) for ping in self.groupings]
215-
return get_indexer_dict(label_list, keys)
215+
return get_indexer_dict(codes_list, keys)
216216

217217
@property
218-
def labels(self):
219-
return [ping.labels for ping in self.groupings]
218+
def codes(self):
219+
return [ping.codes for ping in self.groupings]
220220

221221
@property
222222
def levels(self):
@@ -256,46 +256,46 @@ def is_monotonic(self) -> bool:
256256

257257
@cache_readonly
258258
def group_info(self):
259-
comp_ids, obs_group_ids = self._get_compressed_labels()
259+
comp_ids, obs_group_ids = self._get_compressed_codes()
260260

261261
ngroups = len(obs_group_ids)
262262
comp_ids = ensure_int64(comp_ids)
263263
return comp_ids, obs_group_ids, ngroups
264264

265265
@cache_readonly
266-
def label_info(self):
267-
# return the labels of items in original grouped axis
268-
labels, _, _ = self.group_info
266+
def codes_info(self):
267+
# return the codes of items in original grouped axis
268+
codes, _, _ = self.group_info
269269
if self.indexer is not None:
270-
sorter = np.lexsort((labels, self.indexer))
271-
labels = labels[sorter]
272-
return labels
273-
274-
def _get_compressed_labels(self):
275-
all_labels = [ping.labels for ping in self.groupings]
276-
if len(all_labels) > 1:
277-
group_index = get_group_index(all_labels, self.shape, sort=True, xnull=True)
270+
sorter = np.lexsort((codes, self.indexer))
271+
codes = codes[sorter]
272+
return codes
273+
274+
def _get_compressed_codes(self):
275+
all_codes = [ping.codes for ping in self.groupings]
276+
if len(all_codes) > 1:
277+
group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True)
278278
return compress_group_index(group_index, sort=self.sort)
279279

280280
ping = self.groupings[0]
281-
return ping.labels, np.arange(len(ping.group_index))
281+
return ping.codes, np.arange(len(ping.group_index))
282282

283283
@cache_readonly
284284
def ngroups(self) -> int:
285285
return len(self.result_index)
286286

287287
@property
288-
def recons_labels(self):
288+
def recons_codes(self):
289289
comp_ids, obs_ids, _ = self.group_info
290-
labels = (ping.labels for ping in self.groupings)
291-
return decons_obs_group_ids(comp_ids, obs_ids, self.shape, labels, xnull=True)
290+
codes = (ping.codes for ping in self.groupings)
291+
return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True)
292292

293293
@cache_readonly
294294
def result_index(self):
295295
if not self.compressed and len(self.groupings) == 1:
296296
return self.groupings[0].result_index.rename(self.names[0])
297297

298-
codes = self.recons_labels
298+
codes = self.recons_codes
299299
levels = [ping.result_index for ping in self.groupings]
300300
result = MultiIndex(
301301
levels=levels, codes=codes, verify_integrity=False, names=self.names
@@ -307,9 +307,9 @@ def get_group_levels(self):
307307
return [self.groupings[0].result_index]
308308

309309
name_list = []
310-
for ping, labels in zip(self.groupings, self.recons_labels):
311-
labels = ensure_platform_int(labels)
312-
levels = ping.result_index.take(labels)
310+
for ping, codes in zip(self.groupings, self.recons_codes):
311+
codes = ensure_platform_int(codes)
312+
levels = ping.result_index.take(codes)
313313

314314
name_list.append(levels)
315315

@@ -490,15 +490,15 @@ def _cython_operation(
490490
else:
491491
out_dtype = "object"
492492

493-
labels, _, _ = self.group_info
493+
codes, _, _ = self.group_info
494494

495495
if kind == "aggregate":
496496
result = _maybe_fill(
497497
np.empty(out_shape, dtype=out_dtype), fill_value=np.nan
498498
)
499499
counts = np.zeros(self.ngroups, dtype=np.int64)
500500
result = self._aggregate(
501-
result, counts, values, labels, func, is_datetimelike, min_count
501+
result, counts, values, codes, func, is_datetimelike, min_count
502502
)
503503
elif kind == "transform":
504504
result = _maybe_fill(
@@ -507,7 +507,7 @@ def _cython_operation(
507507

508508
# TODO: min_count
509509
result = self._transform(
510-
result, values, labels, func, is_datetimelike, **kwargs
510+
result, values, codes, func, is_datetimelike, **kwargs
511511
)
512512

513513
if is_integer_dtype(result) and not is_datetimelike:

pandas/tests/groupby/test_grouping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -559,12 +559,12 @@ def test_level_preserve_order(self, sort, labels, mframe):
559559
# GH 17537
560560
grouped = mframe.groupby(level=0, sort=sort)
561561
exp_labels = np.array(labels, np.intp)
562-
tm.assert_almost_equal(grouped.grouper.labels[0], exp_labels)
562+
tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels)
563563

564564
def test_grouping_labels(self, mframe):
565565
grouped = mframe.groupby(mframe.index.get_level_values(0))
566566
exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp)
567-
tm.assert_almost_equal(grouped.grouper.labels[0], exp_labels)
567+
tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels)
568568

569569
def test_list_grouper_with_nat(self):
570570
# GH 14715

pandas/util/testing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,8 +621,8 @@ def _check_types(l, r, obj="Index"):
621621
def _get_ilevel_values(index, level):
622622
# accept level number only
623623
unique = index.levels[level]
624-
labels = index.codes[level]
625-
filled = take_1d(unique.values, labels, fill_value=unique._na_value)
624+
level_codes = index.codes[level]
625+
filled = take_1d(unique.values, level_codes, fill_value=unique._na_value)
626626
values = unique._shallow_copy(filled, name=index.names[level])
627627
return values
628628

0 commit comments

Comments
 (0)