@@ -1690,7 +1690,8 @@ def _nanpercentile(values, q, axis, **kw):
1690
1690
placement = np .arange (len (result )),
1691
1691
ndim = ndim )
1692
1692
1693
- def _replace_coerce (self , mask = None , dst = None , convert = False ):
1693
+ def _replace_coerce (self , mask = None , src = None , dst = None , inplace = True ,
1694
+ convert = False , regex = False , mgr = None ):
1694
1695
"""
1695
1696
Replace value corresponding to the given boolean array with another
1696
1697
value.
@@ -1699,18 +1700,32 @@ def _replace_coerce(self, mask=None, dst=None, convert=False):
1699
1700
----------
1700
1701
mask : array_like of bool
1701
1702
The mask of values to replace.
1703
+ src : object
1704
+ The value to replace. It is ignored if regex is False.
1702
1705
dst : object
1703
1706
The value to be replaced with.
1704
1707
convert : bool
1705
- It is used in ObjectBlocks. It is here for API compatibility.
1708
+ If true, try to coerce any object types to better types.
1709
+ regex : bool
1710
+ If true, search for element matching with the pattern in src.
1711
+ Masked element is ignored.
1712
+ mgr : BlockPlacement, optional
1706
1713
1707
1714
Returns
1708
1715
-------
1709
1716
A new block if there is anything to replace or the original block.
1710
1717
"""
1718
+
1711
1719
if mask .any ():
1712
- self = self .coerce_to_target_dtype (dst )
1713
- return self .putmask (mask , dst , inplace = True )
1720
+ if not regex :
1721
+ self = self .coerce_to_target_dtype (dst )
1722
+ return self .putmask (mask , dst , inplace = inplace )
1723
+ else :
1724
+ return self ._replace_single (src , dst , inplace = inplace ,
1725
+ regex = regex ,
1726
+ convert = convert ,
1727
+ mask = mask ,
1728
+ mgr = mgr )
1714
1729
return self
1715
1730
1716
1731
@@ -2488,7 +2503,7 @@ def replace(self, to_replace, value, inplace=False, filter=None,
2488
2503
regex = regex , mgr = mgr )
2489
2504
2490
2505
def _replace_single (self , to_replace , value , inplace = False , filter = None ,
2491
- regex = False , convert = True , mgr = None ):
2506
+ regex = False , convert = True , mgr = None , mask = None ):
2492
2507
2493
2508
inplace = validate_bool_kwarg (inplace , 'inplace' )
2494
2509
@@ -2555,15 +2570,19 @@ def re_replacer(s):
2555
2570
else :
2556
2571
filt = self .mgr_locs .isin (filter ).nonzero ()[0 ]
2557
2572
2558
- new_values [filt ] = f (new_values [filt ])
2573
+ if mask is None :
2574
+ new_values [filt ] = f (new_values [filt ])
2575
+ else :
2576
+ new_values [filt ][mask ] = f (new_values [filt ][mask ])
2559
2577
2560
2578
# convert
2561
2579
block = self .make_block (new_values )
2562
2580
if convert :
2563
2581
block = block .convert (by_item = True , numeric = False )
2564
2582
return block
2565
2583
2566
- def _replace_coerce (self , mask = None , dst = None , convert = False ):
2584
+ def _replace_coerce (self , mask = None , src = None , dst = None , inplace = True ,
2585
+ convert = False , regex = False , mgr = None ):
2567
2586
"""
2568
2587
Replace value corresponding to the given boolean array with another
2569
2588
value.
@@ -2572,17 +2591,29 @@ def _replace_coerce(self, mask=None, dst=None, convert=False):
2572
2591
----------
2573
2592
mask : array_like of bool
2574
2593
The mask of values to replace.
2594
+ src : object
2595
+ The value to replace. It is ignored if regex is False.
2575
2596
dst : object
2576
2597
The value to be replaced with.
2577
2598
convert : bool
2578
2599
If true, try to coerce any object types to better types.
2600
+ regex : bool
2601
+ If true, search for element matching with the pattern in src.
2602
+ Masked element is ignored.
2603
+ mgr : BlockPlacement, optional
2579
2604
2580
2605
Returns
2581
2606
-------
2582
2607
A new block if there is anything to replace or the original block.
2583
2608
"""
2584
2609
if mask .any ():
2585
- block = super (ObjectBlock , self )._replace_coerce (mask , dst )
2610
+ block = super (ObjectBlock , self )._replace_coerce (mask = mask ,
2611
+ src = src ,
2612
+ dst = dst ,
2613
+ inplace = inplace ,
2614
+ convert = convert ,
2615
+ regex = regex ,
2616
+ mgr = mgr )
2586
2617
if convert :
2587
2618
block = [b .convert (by_item = True , numeric = False , copy = True )
2588
2619
for b in block ]
@@ -3779,17 +3810,17 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False,
3779
3810
# figure out our mask a-priori to avoid repeated replacements
3780
3811
values = self .as_array ()
3781
3812
3782
- # only support equality comparision, regex comparision support
3783
- # is needed in the future
3784
- def comp (s ):
3813
+ def comp (s , reg = False ):
3785
3814
if isna (s ):
3786
3815
return isna (values )
3787
3816
if hasattr (s , 'asm8' ):
3788
3817
return _maybe_compare (maybe_convert_objects (values ),
3789
- getattr (s , 'asm8' ), operator .eq )
3790
- return _maybe_compare (values , s , operator .eq )
3818
+ getattr (s , 'asm8' ), reg )
3819
+ if reg and is_re_compilable (s ):
3820
+ return _maybe_compare (values , s , reg )
3821
+ return _maybe_compare (values , s , reg )
3791
3822
3792
- masks = [comp (s ) for i , s in enumerate (src_list )]
3823
+ masks = [comp (s , regex ) for i , s in enumerate (src_list )]
3793
3824
3794
3825
result_blocks = []
3795
3826
src_len = len (src_list ) - 1
@@ -3801,39 +3832,16 @@ def comp(s):
3801
3832
for i , (s , d ) in enumerate (zip (src_list , dest_list )):
3802
3833
new_rb = []
3803
3834
for b in rb :
3804
- # regular expression support needs to be improved.
3805
- # If the replacement for the previous pattern
3806
- # matches the next pattern, the value will be replaced
3807
- # again with a different value from dest_list
3808
- # i.e. when values is ['a', 'b']
3809
- # src_list: [r'a*', r'b*'], dest_list: ['b', 'a']
3810
- # result will be ['b', b'] after searching for pattern r'a'
3811
- # and then changed to ['a', 'a'] for pattern r'b*'
3812
- if regex :
3813
- if is_object_dtype (b .dtype ):
3814
- convert = i == src_len
3815
- result = b .replace (s , d , inplace = inplace ,
3816
- regex = regex ,
3817
- mgr = mgr , convert = convert )
3818
- new_rb = _extend_blocks (result , new_rb )
3819
- else :
3820
- # get our mask for this element, sized to this
3821
- # particular block
3822
- m = masks [i ][b .mgr_locs .indexer ]
3823
- if m .any ():
3824
- b = b .coerce_to_target_dtype (d )
3825
- new_rb .extend (b .putmask (m , d , inplace = True ))
3826
- else :
3827
- new_rb .append (b )
3835
+ m = masks [i ][b .mgr_locs .indexer ]
3836
+ convert = i == src_len
3837
+ result = b ._replace_coerce (mask = m , src = s , dst = d ,
3838
+ inplace = inplace ,
3839
+ convert = convert , regex = regex ,
3840
+ mgr = mgr )
3841
+ if m .any ():
3842
+ new_rb = _extend_blocks (result , new_rb )
3828
3843
else :
3829
- m = masks [i ][b .mgr_locs .indexer ]
3830
- convert = i == src_len
3831
- result = b ._replace_coerce (mask = m , dst = d ,
3832
- convert = convert )
3833
- if m .any ():
3834
- new_rb = _extend_blocks (result , new_rb )
3835
- else :
3836
- new_rb .append (b )
3844
+ new_rb .append (b )
3837
3845
rb = new_rb
3838
3846
result_blocks .extend (rb )
3839
3847
@@ -5207,7 +5215,12 @@ def _vstack(to_stack, dtype):
5207
5215
return np .vstack (to_stack )
5208
5216
5209
5217
5210
- def _maybe_compare (a , b , op ):
5218
+ def _maybe_compare (a , b , regex = False ):
5219
+ if not regex :
5220
+ op = lambda x : operator .eq (x , b )
5221
+ else :
5222
+ op = np .vectorize (lambda x : bool (re .match (b , x )) if isinstance (x , str )
5223
+ else False )
5211
5224
5212
5225
is_a_array = isinstance (a , np .ndarray )
5213
5226
is_b_array = isinstance (b , np .ndarray )
@@ -5219,9 +5232,8 @@ def _maybe_compare(a, b, op):
5219
5232
# numpy deprecation warning if comparing numeric vs string-like
5220
5233
elif is_numeric_v_string_like (a , b ):
5221
5234
result = False
5222
-
5223
5235
else :
5224
- result = op (a , b )
5236
+ result = op (a )
5225
5237
5226
5238
if is_scalar (result ) and (is_a_array or is_b_array ):
5227
5239
type_names = [type (a ).__name__ , type (b ).__name__ ]
0 commit comments