Skip to content

Commit 4ac27db

Browse files
committed
Removing duplication in value label class and returning labels from prepare_non_cat
1 parent 85374fd commit 4ac27db

File tree

1 file changed

+24
-22
lines changed

1 file changed

+24
-22
lines changed

pandas/io/stata.py

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -668,17 +668,20 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
668668
)
669669
self.value_labels.sort(key=lambda x: x[0])
670670

671+
self._prepare_value_labels()
672+
673+
def _prepare_value_labels(self):
674+
""" Encode value labels. """
675+
671676
self.text_len = 0
672677
self.txt: list[bytes] = []
673678
self.n = 0
679+
# Offsets (length of categories), converted to int32
674680
self.off = np.array([])
681+
# Values, converted to int32
675682
self.val = np.array([])
676683
self.len = 0
677684

678-
self._prepare_value_labels()
679-
680-
def _prepare_value_labels(self):
681-
""" Encode value labels. """
682685
# Compute lengths and setup lists of offsets and labels
683686
offsets: list[int] = []
684687
values: list[int | float] = []
@@ -792,14 +795,6 @@ def __init__(
792795
self.value_labels: list[tuple[int | float, str]] = sorted(
793796
value_labels.items(), key=lambda x: x[0]
794797
)
795-
796-
self.text_len = 0
797-
self.txt: list[bytes] = []
798-
self.n = 0
799-
self.off = np.array([])
800-
self.val = np.array([])
801-
self.len = 0
802-
803798
self._prepare_value_labels()
804799

805800

@@ -2296,6 +2291,7 @@ def __init__(
22962291
self._variable_labels = variable_labels
22972292
self._non_cat_value_labels = value_labels
22982293
self._value_labels: list[StataValueLabel] = []
2294+
self._has_value_labels = np.array([], dtype=bool)
22992295
self._compression = compression
23002296
self._output_file: Buffer | None = None
23012297
self._converted_names: dict[Hashable, str] = {}
@@ -2323,15 +2319,16 @@ def _write_bytes(self, value: bytes) -> None:
23232319
"""
23242320
self.handles.handle.write(value) # type: ignore[arg-type]
23252321

2326-
def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
2322+
def _prepare_non_cat_value_labels(
2323+
self, data: DataFrame
2324+
) -> list[StataNonCatValueLabel]:
23272325
"""
23282326
Check for value labels provided for non-categorical columns. Value
23292327
labels
23302328
"""
2331-
self._has_value_labels = np.repeat(False, data.shape[1])
2332-
labelled_columns = []
2329+
non_cat_value_labels: list[StataNonCatValueLabel] = []
23332330
if self._non_cat_value_labels is None:
2334-
return
2331+
return non_cat_value_labels
23352332

23362333
for labname, labels in self._non_cat_value_labels.items():
23372334
if labname in self._converted_names:
@@ -2352,11 +2349,8 @@ def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
23522349
"can only be applied to numeric columns."
23532350
)
23542351
svl = StataNonCatValueLabel(colname, labels)
2355-
self._value_labels.append(svl)
2356-
labelled_columns.append(colname)
2357-
2358-
has_non_cat_val_labels = data.columns.isin(labelled_columns)
2359-
self._has_value_labels |= has_non_cat_val_labels
2352+
non_cat_value_labels.append(svl)
2353+
return non_cat_value_labels
23602354

23612355
def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
23622356
"""
@@ -2548,8 +2542,16 @@ def _prepare_pandas(self, data: DataFrame) -> None:
25482542
# Replace NaNs with Stata missing values
25492543
data = self._replace_nans(data)
25502544

2545+
# Set all columns to initially unlabelled
2546+
self._has_value_labels = np.repeat(False, data.shape[1])
2547+
25512548
# Create value labels for non-categorical data
2552-
self._prepare_non_cat_value_labels(data)
2549+
non_cat_value_labels = self._prepare_non_cat_value_labels(data)
2550+
2551+
non_cat_columns = [svl.labname for svl in non_cat_value_labels]
2552+
has_non_cat_val_labels = data.columns.isin(non_cat_columns)
2553+
self._has_value_labels |= has_non_cat_val_labels
2554+
self._value_labels.extend(non_cat_value_labels)
25532555

25542556
# Convert categoricals to int data, and strip labels
25552557
data = self._prepare_categoricals(data)

0 commit comments

Comments
 (0)