@@ -665,10 +665,18 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
665
665
categories = catarray .cat .categories
666
666
self .value_labels = list (zip (np .arange (len (categories )), categories ))
667
667
self .value_labels .sort (key = lambda x : x [0 ])
668
+
668
669
self .text_len = 0
669
670
self .txt : list [bytes ] = []
670
671
self .n = 0
672
+ self .off = np .array ([])
673
+ self .val = np .array ([])
674
+ self .len = 0
675
+
676
+ self ._prepare_value_labels ()
671
677
678
+ def _prepare_value_labels (self ):
679
+ """ Encode value labels. """
672
680
# Compute lengths and setup lists of offsets and labels
673
681
offsets : list [int ] = []
674
682
values : list [int ] = []
@@ -677,10 +685,10 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
677
685
if not isinstance (category , str ):
678
686
category = str (category )
679
687
warnings .warn (
680
- value_label_mismatch_doc .format (catarray . name ),
688
+ value_label_mismatch_doc .format (self . labname ),
681
689
ValueLabelTypeMismatch ,
682
690
)
683
- category = category .encode (encoding )
691
+ category = category .encode (self . _encoding )
684
692
offsets .append (self .text_len )
685
693
self .text_len += len (category ) + 1 # +1 for the padding
686
694
values .append (vl [0 ])
@@ -785,37 +793,11 @@ def __init__(
785
793
self .text_len = 0
786
794
self .txt : list [bytes ] = []
787
795
self .n = 0
796
+ self .off = np .array ([])
797
+ self .val = np .array ([])
798
+ self .len = 0
788
799
789
- # Compute lengths and setup lists of offsets and labels
790
- offsets : list [int ] = []
791
- values : list [int ] = []
792
- for vl in self .value_labels :
793
- category = vl [1 ]
794
- if not isinstance (category , str ):
795
- category = str (category )
796
- warnings .warn (
797
- value_label_mismatch_doc .format (labname ),
798
- ValueLabelTypeMismatch ,
799
- )
800
- category = category .encode (encoding )
801
- offsets .append (self .text_len )
802
- self .text_len += len (category ) + 1 # +1 for the padding
803
- values .append (vl [0 ])
804
- self .txt .append (category )
805
- self .n += 1
806
-
807
- if self .text_len > 32000 :
808
- raise ValueError (
809
- "Stata value labels for a single variable must "
810
- "have a combined length less than 32,000 characters."
811
- )
812
-
813
- # Ensure int32
814
- self .off = np .array (offsets , dtype = np .int32 )
815
- self .val = np .array (values , dtype = np .int32 )
816
-
817
- # Total length
818
- self .len = 4 + 4 + 4 * self .n + 4 * self .n + self .text_len
800
+ self ._prepare_value_labels ()
819
801
820
802
821
803
class StataMissingValue :
0 commit comments