Skip to content

REF: move most of Block.interpolate to an array method #40671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 13 additions & 107 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1106,128 +1106,34 @@ def interpolate(
# If there are no NAs, then interpolate is a no-op
return [self] if inplace else [self.copy()]

# a fill na type method
try:
m = missing.clean_fill_method(method)
except ValueError:
m = None
if m is None and self.dtype.kind != "f":
# only deal with floats
# bc we already checked that can_hold_na, we dont have int dtype here
# TODO: make a copy if not inplace?
return [self]

if m is not None:
if fill_value is not None:
# similar to validate_fillna_kwargs
raise ValueError("Cannot pass both fill_value and method")

return self._interpolate_with_fill(
method=m,
axis=axis,
inplace=inplace,
limit=limit,
limit_area=limit_area,
downcast=downcast,
)
# validate the interp method
m = missing.clean_interp_method(method, **kwargs)

assert index is not None # for mypy
data = self.values if inplace else self.values.copy()
data = cast(np.ndarray, data) # bc overridden by ExtensionBlock

return self._interpolate(
method=m,
index=index,
interp_values = missing.interpolate_array_2d(
data,
method=method,
axis=axis,
index=index,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
inplace=inplace,
downcast=downcast,
**kwargs,
)

@final
def _interpolate_with_fill(
self,
method: str = "pad",
axis: int = 0,
inplace: bool = False,
limit: Optional[int] = None,
limit_area: Optional[str] = None,
downcast: Optional[str] = None,
) -> List[Block]:
""" fillna but using the interpolate machinery """
inplace = validate_bool_kwarg(inplace, "inplace")

assert self._can_hold_na # checked by caller

values = self.values if inplace else self.values.copy()

values = missing.interpolate_2d(
values,
method=method,
axis=axis,
limit=limit,
limit_area=limit_area,
)

values = maybe_coerce_values(values)
blocks = [self.make_block_same_class(values)]
return self._maybe_downcast(blocks, downcast)

@final
def _interpolate(
self,
method: str,
index: Index,
fill_value: Optional[Any] = None,
axis: int = 0,
limit: Optional[int] = None,
limit_direction: str = "forward",
limit_area: Optional[str] = None,
inplace: bool = False,
downcast: Optional[str] = None,
**kwargs,
) -> List[Block]:
""" interpolate using scipy wrappers """
inplace = validate_bool_kwarg(inplace, "inplace")
data = self.values if inplace else self.values.copy()

# only deal with floats
if self.dtype.kind != "f":
# bc we already checked that can_hold_na, we dont have int dtype here
return [self]

if is_valid_na_for_dtype(fill_value, self.dtype):
fill_value = self.fill_value

if method in ("krogh", "piecewise_polynomial", "pchip"):
if not index.is_monotonic:
raise ValueError(
f"{method} interpolation requires that the index be monotonic."
)
# process 1-d slices in the axis direction

def func(yvalues: np.ndarray) -> np.ndarray:

# process a 1-d slice, returning it
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to missing.interpolate_1d
return missing.interpolate_1d(
xvalues=index,
yvalues=yvalues,
method=method,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
bounds_error=False,
**kwargs,
)

# interp each column independently
interp_values = np.apply_along_axis(func, axis, data)
interp_values = maybe_coerce_values(interp_values)

blocks = [self.make_block_same_class(interp_values)]
return self._maybe_downcast(blocks, downcast)
nbs = [self.make_block_same_class(interp_values)]
return self._maybe_downcast(nbs, downcast)

def take_nd(
self,
Expand Down
112 changes: 109 additions & 3 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@
is_numeric_v_string_like,
needs_i8_conversion,
)
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
na_value_for_dtype,
)

if TYPE_CHECKING:
from pandas import Index
Expand Down Expand Up @@ -145,7 +149,7 @@ def clean_fill_method(method, allow_nearest: bool = False):
]


def clean_interp_method(method: str, **kwargs) -> str:
def clean_interp_method(method: str, index: Index, **kwargs) -> str:
order = kwargs.get("order")

if method in ("spline", "polynomial") and order is None:
Expand All @@ -155,6 +159,12 @@ def clean_interp_method(method: str, **kwargs) -> str:
if method not in valid:
raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")

if method in ("krogh", "piecewise_polynomial", "pchip"):
if not index.is_monotonic:
raise ValueError(
f"{method} interpolation requires that the index be monotonic."
)

return method


Expand Down Expand Up @@ -195,6 +205,102 @@ def find_valid_index(values, *, how: str) -> Optional[int]:
return idxpos


def interpolate_array_2d(
data: np.ndarray,
method: str = "pad",
axis: int = 0,
index: Optional[Index] = None,
limit: Optional[int] = None,
limit_direction: str = "forward",
limit_area: Optional[str] = None,
fill_value: Optional[Any] = None,
coerce: bool = False,
downcast: Optional[str] = None,
**kwargs,
):
"""
Wrapper to dispatch to either interpolate_2d or interpolate_2d_with_fill.
"""
try:
m = clean_fill_method(method)
except ValueError:
m = None

if m is not None:
if fill_value is not None:
# similar to validate_fillna_kwargs
raise ValueError("Cannot pass both fill_value and method")

interp_values = interpolate_2d(
data,
method=m,
axis=axis,
limit=limit,
limit_area=limit_area,
)
else:
assert index is not None # for mypy

interp_values = interpolate_2d_with_fill(
data=data,
index=index,
axis=axis,
method=method,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
**kwargs,
)
return interp_values


def interpolate_2d_with_fill(
data: np.ndarray, # floating dtype
index: Index,
axis: int,
method: str = "linear",
limit: Optional[int] = None,
limit_direction: str = "forward",
limit_area: Optional[str] = None,
fill_value: Optional[Any] = None,
**kwargs,
) -> np.ndarray:
"""
Column-wise application of interpolate_1d.

Notes
-----
The signature does differs from interpolate_1d because it only
includes what is needed for Block.interpolate.
"""
# validate the interp method
clean_interp_method(method, index, **kwargs)

if is_valid_na_for_dtype(fill_value, data.dtype):
fill_value = na_value_for_dtype(data.dtype, compat=False)

def func(yvalues: np.ndarray) -> np.ndarray:
# process 1-d slices in the axis direction, returning it

# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to interpolate_1d
return interpolate_1d(
xvalues=index,
yvalues=yvalues,
method=method,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
bounds_error=False,
**kwargs,
)

# interp each column independently
return np.apply_along_axis(func, axis, data)


def interpolate_1d(
xvalues: Index,
yvalues: np.ndarray,
Expand Down Expand Up @@ -638,7 +744,7 @@ def interpolate_2d(
Perform an actual interpolation of values, values will be make 2-d if
needed fills inplace, returns the result.

Parameters
Parameters
----------
values: array-like
Input array.
Expand Down