@@ -668,17 +668,20 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
668
668
)
669
669
self .value_labels .sort (key = lambda x : x [0 ])
670
670
671
+ self ._prepare_value_labels ()
672
+
673
+ def _prepare_value_labels (self ):
674
+ """ Encode value labels. """
675
+
671
676
self .text_len = 0
672
677
self .txt : list [bytes ] = []
673
678
self .n = 0
679
+ # Offsets (length of categories), converted to int32
674
680
self .off = np .array ([])
681
+ # Values, converted to int32
675
682
self .val = np .array ([])
676
683
self .len = 0
677
684
678
- self ._prepare_value_labels ()
679
-
680
- def _prepare_value_labels (self ):
681
- """ Encode value labels. """
682
685
# Compute lengths and setup lists of offsets and labels
683
686
offsets : list [int ] = []
684
687
values : list [int | float ] = []
@@ -792,14 +795,6 @@ def __init__(
792
795
self .value_labels : list [tuple [int | float , str ]] = sorted (
793
796
value_labels .items (), key = lambda x : x [0 ]
794
797
)
795
-
796
- self .text_len = 0
797
- self .txt : list [bytes ] = []
798
- self .n = 0
799
- self .off = np .array ([])
800
- self .val = np .array ([])
801
- self .len = 0
802
-
803
798
self ._prepare_value_labels ()
804
799
805
800
@@ -2296,6 +2291,7 @@ def __init__(
2296
2291
self ._variable_labels = variable_labels
2297
2292
self ._non_cat_value_labels = value_labels
2298
2293
self ._value_labels : list [StataValueLabel ] = []
2294
+ self ._has_value_labels = np .array ([], dtype = bool )
2299
2295
self ._compression = compression
2300
2296
self ._output_file : Buffer | None = None
2301
2297
self ._converted_names : dict [Hashable , str ] = {}
@@ -2323,15 +2319,16 @@ def _write_bytes(self, value: bytes) -> None:
2323
2319
"""
2324
2320
self .handles .handle .write (value ) # type: ignore[arg-type]
2325
2321
2326
- def _prepare_non_cat_value_labels (self , data : DataFrame ) -> None :
2322
+ def _prepare_non_cat_value_labels (
2323
+ self , data : DataFrame
2324
+ ) -> list [StataNonCatValueLabel ]:
2327
2325
"""
2328
2326
Check for value labels provided for non-categorical columns. Value
2329
2327
labels
2330
2328
"""
2331
- self ._has_value_labels = np .repeat (False , data .shape [1 ])
2332
- labelled_columns = []
2329
+ non_cat_value_labels : list [StataNonCatValueLabel ] = []
2333
2330
if self ._non_cat_value_labels is None :
2334
- return
2331
+ return non_cat_value_labels
2335
2332
2336
2333
for labname , labels in self ._non_cat_value_labels .items ():
2337
2334
if labname in self ._converted_names :
@@ -2352,11 +2349,8 @@ def _prepare_non_cat_value_labels(self, data: DataFrame) -> None:
2352
2349
"can only be applied to numeric columns."
2353
2350
)
2354
2351
svl = StataNonCatValueLabel (colname , labels )
2355
- self ._value_labels .append (svl )
2356
- labelled_columns .append (colname )
2357
-
2358
- has_non_cat_val_labels = data .columns .isin (labelled_columns )
2359
- self ._has_value_labels |= has_non_cat_val_labels
2352
+ non_cat_value_labels .append (svl )
2353
+ return non_cat_value_labels
2360
2354
2361
2355
def _prepare_categoricals (self , data : DataFrame ) -> DataFrame :
2362
2356
"""
@@ -2548,8 +2542,16 @@ def _prepare_pandas(self, data: DataFrame) -> None:
2548
2542
# Replace NaNs with Stata missing values
2549
2543
data = self ._replace_nans (data )
2550
2544
2545
+ # Set all columns to initially unlabelled
2546
+ self ._has_value_labels = np .repeat (False , data .shape [1 ])
2547
+
2551
2548
# Create value labels for non-categorical data
2552
- self ._prepare_non_cat_value_labels (data )
2549
+ non_cat_value_labels = self ._prepare_non_cat_value_labels (data )
2550
+
2551
+ non_cat_columns = [svl .labname for svl in non_cat_value_labels ]
2552
+ has_non_cat_val_labels = data .columns .isin (non_cat_columns )
2553
+ self ._has_value_labels |= has_non_cat_val_labels
2554
+ self ._value_labels .extend (non_cat_value_labels )
2553
2555
2554
2556
# Convert categoricals to int data, and strip labels
2555
2557
data = self ._prepare_categoricals (data )
0 commit comments