pandas-dev · jreback · Jan 27, 2022 · Jan 17, 2022 · Jan 24, 2022 · Jan 24, 2022
diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -2,20 +2,9 @@
 
 import numpy as np
 
-from pandas._libs import (
-    lib,
-    missing as libmissing,
-)
 from pandas._typing import DtypeObj
 from pandas.util._decorators import cache_readonly
 
-from pandas.core.dtypes.common import (
-    is_bool_dtype,
-    is_float_dtype,
-    is_integer_dtype,
-    is_object_dtype,
-    is_string_dtype,
-)
 from pandas.core.dtypes.dtypes import register_extension_dtype
 
 from pandas.core.arrays.numeric import (
@@ -34,6 +23,8 @@ class FloatingDtype(NumericDtype):
     The attributes name & type are set when these subclasses are created.
     """
 
+    _default_np_dtype = np.dtype(np.float64)
+
     def __repr__(self) -> str:
         return f"{self.name}Dtype()"
 
@@ -66,31 +57,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
             return FLOAT_STR_TO_DTYPE[str(np_dtype)]
         return None
 
-
-def coerce_to_array(
-    values, dtype=None, mask=None, copy: bool = False
-) -> tuple[np.ndarray, np.ndarray]:
-    """
-    Coerce the input values array to numpy arrays with a mask.
-
-    Parameters
-    ----------
-    values : 1D list-like
-    dtype : float dtype
-    mask : bool 1D array, optional
-    copy : bool, default False
-        if True, copy the input
-
-    Returns
-    -------
-    tuple of (values, mask)
-    """
-    # if values is floating numpy array, preserve its dtype
-    if dtype is None and hasattr(values, "dtype"):
-        if is_float_dtype(values.dtype):
-            dtype = values.dtype
-
-    if dtype is not None:
+    @classmethod
+    def _standardize_dtype(cls, dtype) -> FloatingDtype:
         if isinstance(dtype, str) and dtype.startswith("Float"):
             # Avoid DeprecationWarning from NumPy about np.dtype("Float64")
             # https://github.com/numpy/numpy/pull/7476
@@ -101,60 +69,18 @@ def coerce_to_array(
                 dtype = FLOAT_STR_TO_DTYPE[str(np.dtype(dtype))]
             except KeyError as err:
                 raise ValueError(f"invalid dtype specified {dtype}") from err
+        return dtype
 
-    if isinstance(values, FloatingArray):
-        values, mask = values._data, values._mask
-        if dtype is not None:
-            values = values.astype(dtype.numpy_dtype, copy=False)
-
-        if copy:
-            values = values.copy()
-            mask = mask.copy()
-        return values, mask
-
-    values = np.array(values, copy=copy)
-    if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
-        inferred_type = lib.infer_dtype(values, skipna=True)
-        if inferred_type == "empty":
-            pass
-        elif inferred_type == "boolean":
-            raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
-
-    elif is_bool_dtype(values) and is_float_dtype(dtype):
-        values = np.array(values, dtype=float, copy=copy)
-
-    elif not (is_integer_dtype(values) or is_float_dtype(values)):
-        raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
-
-    if values.ndim != 1:
-        raise TypeError("values must be a 1D list-like")
-
-    if mask is None:
-        mask = libmissing.is_numeric_na(values)
-
-    else:
-        assert len(mask) == len(values)
-
-    if not mask.ndim == 1:
-        raise TypeError("mask must be a 1D list-like")
-
-    # infer dtype if needed
-    if dtype is None:
-        dtype = np.dtype("float64")
-    else:
-        dtype = dtype.type
-
-    # if we are float, let's make sure that we can
-    # safely cast
-
-    # we copy as need to coerce here
-    # TODO should this be a safe cast?
-    if mask.any():
-        values = values.copy()
-        values[mask] = np.nan
-    values = values.astype(dtype, copy=False)  # , casting="safe")
+    @classmethod
+    def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
+        """
+        Safely cast the values to the given dtype.
 
-    return values, mask
+        "safe" in this context means the casting is lossless.
+        """
+        # This is really only here for compatibility with IntegerDtype
+        # Here for compat with IntegerDtype
+        return values.astype(dtype, copy=copy)
 
 
 class FloatingArray(NumericArray):
@@ -217,8 +143,10 @@ class FloatingArray(NumericArray):
     Length: 3, dtype: Float32
     """
 
+    _dtype_cls = FloatingDtype
+
     # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value = 0.0
+    _internal_fill_value = np.nan
     # Fill values used for any/all
     _truthy_value = 1.0
     _falsey_value = 0.0
@@ -239,12 +167,6 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
 
         super().__init__(values, mask, copy=copy)
 
-    @classmethod
-    def _coerce_to_array(
-        cls, value, *, dtype: DtypeObj, copy: bool = False
-    ) -> tuple[np.ndarray, np.ndarray]:
-        return coerce_to_array(value, dtype=dtype, copy=copy)
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} data.

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -2,21 +2,10 @@
 
 import numpy as np
 
-from pandas._libs import (
-    lib,
-    missing as libmissing,
-)
 from pandas._typing import DtypeObj
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.base import register_extension_dtype
-from pandas.core.dtypes.common import (
-    is_bool_dtype,
-    is_float_dtype,
-    is_integer_dtype,
-    is_object_dtype,
-    is_string_dtype,
-)
 
 from pandas.core.arrays.masked import BaseMaskedDtype
 from pandas.core.arrays.numeric import (
@@ -35,6 +24,8 @@ class _IntegerDtype(NumericDtype):
     The attributes name & type are set when these subclasses are created.
     """
 
+    _default_np_dtype = np.dtype(np.int64)
+
     def __repr__(self) -> str:
         sign = "U" if self.is_unsigned_integer else ""
         return f"{sign}Int{8 * self.itemsize}Dtype()"
@@ -94,49 +85,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
             return FLOAT_STR_TO_DTYPE[str(np_dtype)]
         return None
 
-
-def safe_cast(values, dtype, copy: bool):
-    """
-    Safely cast the values to the dtype if they
-    are equivalent, meaning floats must be equivalent to the
-    ints.
-    """
-    try:
-        return values.astype(dtype, casting="safe", copy=copy)
-    except TypeError as err:
-        casted = values.astype(dtype, copy=copy)
-        if (casted == values).all():
-            return casted
-
-        raise TypeError(
-            f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
-        ) from err
-
-
-def coerce_to_array(
-    values, dtype, mask=None, copy: bool = False
-) -> tuple[np.ndarray, np.ndarray]:
-    """
-    Coerce the input values array to numpy arrays with a mask.
-
-    Parameters
-    ----------
-    values : 1D list-like
-    dtype : integer dtype
-    mask : bool 1D array, optional
-    copy : bool, default False
-        if True, copy the input
-
-    Returns
-    -------
-    tuple of (values, mask)
-    """
-    # if values is integer numpy array, preserve its dtype
-    if dtype is None and hasattr(values, "dtype"):
-        if is_integer_dtype(values.dtype):
-            dtype = values.dtype
-
-    if dtype is not None:
+    @classmethod
+    def _standardize_dtype(cls, dtype) -> _IntegerDtype:
         if isinstance(dtype, str) and (
             dtype.startswith("Int") or dtype.startswith("UInt")
         ):
@@ -149,64 +99,26 @@ def coerce_to_array(
                 dtype = INT_STR_TO_DTYPE[str(np.dtype(dtype))]
             except KeyError as err:
                 raise ValueError(f"invalid dtype specified {dtype}") from err
+        return dtype
+
+    @classmethod
+    def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
+        """
+        Safely cast the values to the given dtype.
+
+        "safe" in this context means the casting is lossless. e.g. if 'values'
+        has a floating dtype, each value must be an integer.
+        """
+        try:
+            return values.astype(dtype, casting="safe", copy=copy)
+        except TypeError as err:
+            casted = values.astype(dtype, copy=copy)
+            if (casted == values).all():
+                return casted
 
-    if isinstance(values, IntegerArray):
-        values, mask = values._data, values._mask
-        if dtype is not None:
-            values = values.astype(dtype.numpy_dtype, copy=False)
-
-        if copy:
-            values = values.copy()
-            mask = mask.copy()
-        return values, mask
-
-    values = np.array(values, copy=copy)
-    inferred_type = None
-    if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
-        inferred_type = lib.infer_dtype(values, skipna=True)
-        if inferred_type == "empty":
-            pass
-        elif inferred_type == "boolean":
-            raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
-
-    elif is_bool_dtype(values) and is_integer_dtype(dtype):
-        values = np.array(values, dtype=int, copy=copy)
-
-    elif not (is_integer_dtype(values) or is_float_dtype(values)):
-        raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
-
-    if values.ndim != 1:
-        raise TypeError("values must be a 1D list-like")
-
-    if mask is None:
-        mask = libmissing.is_numeric_na(values)
-    else:
-        assert len(mask) == len(values)
-
-    if mask.ndim != 1:
-        raise TypeError("mask must be a 1D list-like")
-
-    # infer dtype if needed
-    if dtype is None:
-        dtype = np.dtype("int64")
-    else:
-        dtype = dtype.type
-
-    # if we are float, let's make sure that we can
-    # safely cast
-
-    # we copy as need to coerce here
-    if mask.any():
-        values = values.copy()
-        values[mask] = 1
-    if inferred_type in ("string", "unicode"):
-        # casts from str are always safe since they raise
-        # a ValueError if the str cannot be parsed into an int
-        values = values.astype(dtype, copy=copy)
-    else:
-        values = safe_cast(values, dtype, copy=False)
-
-    return values, mask
+            raise TypeError(
+                f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
+            ) from err
 
 
 class IntegerArray(NumericArray):
@@ -277,6 +189,8 @@ class IntegerArray(NumericArray):
     Length: 3, dtype: UInt16
     """
 
+    _dtype_cls = _IntegerDtype
+
     # The value used to fill '_data' to avoid upcasting
     _internal_fill_value = 1
     # Fill values used for any/all
@@ -295,12 +209,6 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
             )
         super().__init__(values, mask, copy=copy)
 
-    @classmethod
-    def _coerce_to_array(
-        cls, value, *, dtype: DtypeObj, copy: bool = False
-    ) -> tuple[np.ndarray, np.ndarray]:
-        return coerce_to_array(value, dtype=dtype, copy=copy)
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} integer data.