-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
FIX: fix interpolate with kwarg limit area and limit direction using pad or bfill #31048
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
9afe992
3a191b9
fd5d8e8
26d88ed
6597aca
c536d3c
ed9cf21
2980325
ecf428e
f8a3423
6733186
c5b77d2
0bb36de
a467afd
5466d8c
3e968fc
556a3cf
6c1e429
767b0ca
b82aaff
26ef7b5
b4b6b5a
e259549
8ceff58
7c5ad7d
92148ff
d62e02e
c2473f2
610e347
570e3c2
721304a
73ab1bf
a33f629
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6527,9 +6527,11 @@ def replace( | |
0. | ||
inplace : bool, default False | ||
Update the data in place if possible. | ||
limit_direction : {'forward', 'backward', 'both'}, default 'forward' | ||
limit_direction : {'forward', 'backward', 'both'}, default is None | ||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
If limit is specified, consecutive NaNs will be filled in this | ||
direction. | ||
direction. If the methods 'pad' or 'ffill' are used it must be | ||
None or 'forward'. If 'backfill' or 'bfill' are use it must be | ||
None or 'backwards'. | ||
limit_area : {`None`, 'inside', 'outside'}, default None | ||
If limit is specified, consecutive NaNs will be filled with this | ||
restriction. | ||
|
@@ -6680,7 +6682,7 @@ def interpolate( | |
axis=0, | ||
limit=None, | ||
inplace=False, | ||
limit_direction="forward", | ||
limit_direction=None, | ||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
limit_area=None, | ||
downcast=None, | ||
**kwargs, | ||
|
@@ -6720,6 +6722,28 @@ def interpolate( | |
"column to a numeric dtype." | ||
) | ||
|
||
# Set `limit_direction` depending on `method` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a reason this logic does not simply belong in interpolate_2d? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or call something like clean_fill_method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
My idea was to detect conflicting user input, e.g. But I can move the logic to a function like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. interpolate_2d is also called from fillna. only NDFrame.interpolate allows limit_direction so does make sense to validate here. clean_fill_method checks method. probably want to avoid passing more parameters around. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls make a method similr to clean_fill_method then, e.g this logic should living pandas/core/missing.py There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure what to do here. after merge with master, there is similar code at this location, but it is not mine... |
||
if (method == "pad") or (method == "ffill"): | ||
if (limit_direction == "backward") or (limit_direction == "both"): | ||
raise ValueError( | ||
f"`limit_direction` must not be `{limit_direction}` " | ||
f"for method `{method}`" | ||
) | ||
else: | ||
limit_direction = "forward" | ||
elif (method == "backfill") or (method == "bfill"): | ||
if (limit_direction == "forward") or (limit_direction == "both"): | ||
raise ValueError( | ||
f"`limit_direction` must not be `{limit_direction}` " | ||
f"for method `{method}`" | ||
) | ||
else: | ||
limit_direction = "backward" | ||
else: | ||
# Set default | ||
if limit_direction is None: | ||
limit_direction = "forward" | ||
|
||
# create/use the index | ||
if method == "linear": | ||
# prior default | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
import numpy as np | ||
|
||
from pandas._libs import algos, lib | ||
from pandas._typing import Dtype, Hashable, Optional | ||
from pandas.compat._optional import import_optional_dependency | ||
|
||
from pandas.core.dtypes.cast import infer_dtype_from_array | ||
|
@@ -222,40 +223,14 @@ def interpolate_1d( | |
# default limit is unlimited GH #16282 | ||
limit = algos._validate_limit(nobs=None, limit=limit) | ||
|
||
# These are sets of index pointers to invalid values... i.e. {0, 1, etc... | ||
all_nans = set(np.flatnonzero(invalid)) | ||
start_nans = set(range(find_valid_index(yvalues, "first"))) | ||
end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) | ||
mid_nans = all_nans - start_nans - end_nans | ||
|
||
# Like the sets above, preserve_nans contains indices of invalid values, | ||
# but in this case, it is the final set of indices that need to be | ||
# preserved as NaN after the interpolation. | ||
|
||
# For example if limit_direction='forward' then preserve_nans will | ||
# contain indices of NaNs at the beginning of the series, and NaNs that | ||
# are more than'limit' away from the prior non-NaN. | ||
|
||
# set preserve_nans based on direction using _interp_limit | ||
if limit_direction == "forward": | ||
preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) | ||
elif limit_direction == "backward": | ||
preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) | ||
else: | ||
# both directions... just use _interp_limit | ||
preserve_nans = set(_interp_limit(invalid, limit, limit)) | ||
|
||
# if limit_area is set, add either mid or outside indices | ||
# to preserve_nans GH #16284 | ||
if limit_area == "inside": | ||
# preserve NaNs on the outside | ||
preserve_nans |= start_nans | end_nans | ||
elif limit_area == "outside": | ||
# preserve NaNs on the inside | ||
preserve_nans |= mid_nans | ||
|
||
# sort preserve_nans and covert to list | ||
preserve_nans = sorted(preserve_nans) | ||
preserve_nans = _derive_indices_of_nans_to_preserve( | ||
yvalues=yvalues, | ||
valid=valid, | ||
invalid=invalid, | ||
limit=limit, | ||
limit_area=limit_area, | ||
limit_direction=limit_direction, | ||
) | ||
|
||
xvalues = getattr(xvalues, "values", xvalues) | ||
yvalues = getattr(yvalues, "values", yvalues) | ||
|
@@ -314,6 +289,53 @@ def interpolate_1d( | |
return result | ||
|
||
|
||
def _derive_indices_of_nans_to_preserve( | ||
yvalues, valid, invalid, limit, limit_area, limit_direction | ||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
): | ||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" Derive the indices of NaNs that shall be preserved after interpolation | ||
This function is called by `interpolate_1d` and takes the arguments with | ||
the same name from there. In `interpolate_1d`, after performing the | ||
interpolation the list of indices of NaNs to preserve is used to put | ||
NaNs in the desired locations. | ||
""" | ||
|
||
# These are sets of index pointers to invalid values... i.e. {0, 1, etc... | ||
all_nans = set(np.flatnonzero(invalid)) | ||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
start_nans = set(range(find_valid_index(yvalues, "first"))) | ||
end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) | ||
mid_nans = all_nans - start_nans - end_nans | ||
|
||
# Like the sets above, preserve_nans contains indices of invalid values, | ||
# but in this case, it is the final set of indices that need to be | ||
# preserved as NaN after the interpolation. | ||
|
||
# For example if limit_direction='forward' then preserve_nans will | ||
# contain indices of NaNs at the beginning of the series, and NaNs that | ||
# are more than'limit' away from the prior non-NaN. | ||
|
||
# set preserve_nans based on direction using _interp_limit | ||
if limit_direction == "forward": | ||
preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) | ||
elif limit_direction == "backward": | ||
preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) | ||
else: | ||
# both directions... just use _interp_limit | ||
preserve_nans = set(_interp_limit(invalid, limit, limit)) | ||
|
||
# if limit_area is set, add either mid or outside indices | ||
# to preserve_nans GH #16284 | ||
if limit_area == "inside": | ||
# preserve NaNs on the outside | ||
preserve_nans |= start_nans | end_nans | ||
elif limit_area == "outside": | ||
# preserve NaNs on the inside | ||
preserve_nans |= mid_nans | ||
|
||
# sort preserve_nans and covert to list | ||
preserve_nans = sorted(preserve_nans) | ||
cchwala marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return preserve_nans | ||
|
||
|
||
def _interpolate_scipy_wrapper( | ||
x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs | ||
): | ||
|
@@ -478,6 +500,66 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): | |
return [P(x, nu) for nu in der] | ||
|
||
|
||
def interpolate_1d_fill( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move this under interpolate_1d There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You request, moving it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cchwala interpolate_2d will work on a 1d array. did you investigate applying it along an axis (with masking logic) rather than creating a 1d version. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see #34749 for alternative implementation calling interpolate_2d instead. interpolate_2d already has the limit logic so no need to use the preserve_nans set based logic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used the nice solution from #34749 as suggested above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as a consequence there is no |
||
values, | ||
method: str = "pad", | ||
limit: Optional[int] = None, | ||
limit_area: Optional[str] = None, | ||
fill_value: Optional[Hashable] = None, | ||
dtype: Optional[Dtype] = None, | ||
): | ||
""" | ||
This is a 1D-versoin of `interpolate_2d`, which is used for methods `pad` | ||
and `backfill` when interpolating. This 1D-version is necessary to be | ||
able to handle kwarg `limit_area` via the function | ||
` _derive_indices_of_nans_to_preserve`. It is used the same way as the | ||
1D-interpolation functions which are based on scipy-interpolation, i.e. | ||
via np.apply_along_axis. | ||
""" | ||
if method == "pad": | ||
limit_direction = "forward" | ||
elif method == "backfill": | ||
limit_direction = "backward" | ||
else: | ||
raise ValueError("`method` must be either 'pad' or 'backfill'.") | ||
|
||
orig_values = values | ||
|
||
yvalues = values | ||
invalid = isna(yvalues) | ||
valid = ~invalid | ||
|
||
if values.ndim > 1: | ||
raise AssertionError("This only works with 1D data.") | ||
|
||
if fill_value is None: | ||
mask = None | ||
else: # todo create faster fill func without masking | ||
mask = mask_missing(values, fill_value) | ||
|
||
preserve_nans = _derive_indices_of_nans_to_preserve( | ||
yvalues=yvalues, | ||
valid=valid, | ||
invalid=invalid, | ||
limit=limit, | ||
limit_area=limit_area, | ||
limit_direction=limit_direction, | ||
) | ||
|
||
method = clean_fill_method(method) | ||
if method == "pad": | ||
values = pad_1d(values, limit=limit, mask=mask, dtype=dtype) | ||
else: | ||
values = backfill_1d(values, limit=limit, mask=mask, dtype=dtype) | ||
|
||
if orig_values.dtype.kind == "M": | ||
# convert float back to datetime64 | ||
values = values.astype(orig_values.dtype) | ||
|
||
values[preserve_nans] = fill_value | ||
return values | ||
|
||
|
||
def interpolate_2d( | ||
values, method="pad", axis=0, limit=None, fill_value=None, dtype=None | ||
): | ||
|
Uh oh!
There was an error while loading. Please reload this page.