pandas-dev · jreback · Apr 13, 2021 · Mar 23, 2021 · Mar 24, 2021 · Mar 25, 2021
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
@@ -27,6 +27,7 @@
 from pandas.util._validators import validate_fillna_kwargs
 
 from pandas.core.dtypes.common import is_dtype_equal
+from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.missing import array_equivalent
 
 from pandas.core import missing
@@ -447,3 +448,24 @@ def value_counts(self, dropna: bool = True):
         index_arr = self._from_backing_data(np.asarray(result.index._data))
         index = Index(index_arr, name=result.index.name)
         return Series(result._values, index=index, name=result.name)
+
+    # ------------------------------------------------------------------------
+    # numpy-like methods
+
+    @classmethod
+    def empty(
+        cls: Type[NDArrayBackedExtensionArrayT], shape: Shape, dtype: ExtensionDtype
+    ) -> NDArrayBackedExtensionArrayT:
+        """
+        Analogous to np.empty(shape, dtype=dtype)
+
+        Parameters
+        ----------
+        shape : tuple[int]
+        dtype : ExtensionDtype
+        """
+        # The base implementation uses a naive approach to find the dtype
+        #  for the backing ndarray
+        arr = cls._from_sequence([], dtype=dtype)
+        backing = np.empty(shape, dtype=arr._ndarray.dtype)
+        return arr._from_backing_data(backing)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -34,6 +34,7 @@
     NpDtype,
     Ordered,
     Scalar,
+    Shape,
 )
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import (
@@ -1527,6 +1528,30 @@ def value_counts(self, dropna: bool = True):
 
         return Series(count, index=CategoricalIndex(ix), dtype="int64")
 
+    # error: Argument 2 of "empty" is incompatible with supertype
+    # "NDArrayBackedExtensionArray"; supertype defines the argument type as
+    # "ExtensionDtype"
+    @classmethod
+    def empty(  # type: ignore[override]
+        cls: Type[Categorical], shape: Shape, dtype: CategoricalDtype
+    ) -> Categorical:
+        """
+        Analogous to np.empty(shape, dtype=dtype)
+
+        Parameters
+        ----------
+        shape : tuple[int]
+        dtype : CategoricalDtype
+        """
+        arr = cls._from_sequence([], dtype=dtype)
+
+        # We have to use np.zeros instead of np.empty otherwise the resulting
+        #  ndarray may contain codes not supported by this dtype, in which
+        #  case repr(result) could segfault.
+        backing = np.zeros(shape, dtype=arr._ndarray.dtype)
+
+        return arr._from_backing_data(backing)
+
     def _internal_get_values(self):
         """
         Return the values.

diff --git a/pandas/tests/arrays/test_ndarray_backed.py b/pandas/tests/arrays/test_ndarray_backed.py
@@ -0,0 +1,89 @@
+"""
+Tests for subclasses of NDArrayBackedExtensionArray
+"""
+import numpy as np
+import pytest
+
+from pandas import (
+    CategoricalIndex,
+    date_range,
+)
+from pandas.core.arrays import (
+    Categorical,
+    DatetimeArray,
+    PandasArray,
+    PeriodArray,
+    TimedeltaArray,
+)
+
+
+@pytest.fixture(
+    params=[Categorical, DatetimeArray, TimedeltaArray, PeriodArray, PandasArray]
+)
+def ea_subclass(request):
+    """
+    Fixture for subclasses of NDArrayBackedExtensionArray.
+    """
+    return request.param
+
+
+class TestEmpty:
+    # def test_empty(self, ea_subclass):
+
+    def test_empty_categorical(self):
+        ci = CategoricalIndex(["a", "b", "c"], ordered=True)
+        dtype = ci.dtype
+
+        # case with int8 codes
+        shape = (4,)
+        result = Categorical.empty(shape, dtype=dtype)
+        assert isinstance(result, Categorical)
+        assert result.shape == shape
+        assert result._ndarray.dtype == np.int8
+
+        # case where repr would segfault if we didn't override base implementation
+        result = Categorical.empty((4096,), dtype=dtype)
+        assert isinstance(result, Categorical)
+        assert result.shape == (4096,)
+        assert result._ndarray.dtype == np.int8
+        repr(result)
+
+        # case with int16 codes
+        ci = CategoricalIndex(list(range(512)) * 4, ordered=False)
+        dtype = ci.dtype
+        result = Categorical.empty(shape, dtype=dtype)
+        assert isinstance(result, Categorical)
+        assert result.shape == shape
+        assert result._ndarray.dtype == np.int16
+
+    def test_empty_dt64tz(self):
+        dti = date_range("2016-01-01", periods=2, tz="Asia/Tokyo")
+        dtype = dti.dtype
+
+        shape = (0,)
+        result = DatetimeArray.empty(shape, dtype=dtype)
+        assert result.dtype == dtype
+        assert isinstance(result, DatetimeArray)
+        assert result.shape == shape
+
+    def test_empty_dt64(self):
+        shape = (3, 9)
+        result = DatetimeArray.empty(shape, dtype="datetime64[ns]")
+        assert isinstance(result, DatetimeArray)
+        assert result.shape == shape
+
+    def test_empty_td64(self):
+        shape = (3, 9)
+        result = TimedeltaArray.empty(shape, dtype="m8[ns]")
+        assert isinstance(result, TimedeltaArray)
+        assert result.shape == shape
+
+    def test_empty_pandas_array(self):
+        arr = PandasArray(np.array([1, 2]))
+        dtype = arr.dtype
+
+        shape = (3, 9)
+        result = PandasArray.empty(shape, dtype=dtype)
+        assert isinstance(result, PandasArray)
+        assert result.dtype == dtype
+        assert result.shape == shape