Skip to content

Commit 109dc86

Browse files
pan-vladosDr-Irv
andauthored
GH1074 Add type hint Series[list[str]] for Series.str.split with expand=False (#1075)
* GH1074 Add type hint Series[list[str]] for Series.str.split with expand=False * Updates: - fix Index.str.split method return wrong result; - add test for Index.str.split method with expand=False; - return changes performed in pull request #1029. * Update tests/test_indexes.py Co-authored-by: Irv Lustig <[email protected]> * Update tests/test_series.py Co-authored-by: Irv Lustig <[email protected]> * Update tests/test_series.py Co-authored-by: Irv Lustig <[email protected]> * Updates: - combine two str.split overloads and keep only _TS and _TS2; - fix test_indexes.py test for test_str_split(). * pre-commit fixes * Add type hints and tests for str.rsplit() for expand=False --------- Co-authored-by: Irv Lustig <[email protected]>
1 parent 63dfe96 commit 109dc86

File tree

6 files changed

+74
-11
lines changed

6 files changed

+74
-11
lines changed

pandas-stubs/_typing.pyi

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,8 @@ S1 = TypeVar(
547547
| Period
548548
| Interval
549549
| CategoricalDtype
550-
| BaseOffset,
550+
| BaseOffset
551+
| list[str],
551552
)
552553

553554
S2 = TypeVar(
@@ -566,7 +567,8 @@ S2 = TypeVar(
566567
| Period
567568
| Interval
568569
| CategoricalDtype
569-
| BaseOffset,
570+
| BaseOffset
571+
| list[str],
570572
)
571573

572574
IndexingInt: TypeAlias = (

pandas-stubs/core/indexes/base.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,9 @@ class Index(IndexOpsMixin[S1]):
261261
**kwargs,
262262
) -> Self: ...
263263
@property
264-
def str(self) -> StringMethods[Self, MultiIndex, np_ndarray_bool]: ...
264+
def str(
265+
self,
266+
) -> StringMethods[Self, MultiIndex, np_ndarray_bool, Index[list[str]]]: ...
265267
def is_(self, other) -> bool: ...
266268
def __len__(self) -> int: ...
267269
def __array__(self, dtype=...) -> np.ndarray: ...

pandas-stubs/core/series.pyi

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,26 @@ class Series(IndexOpsMixin[S1], NDFrame):
252252
copy: bool = ...,
253253
) -> Series[Any]: ...
254254
@overload
255+
def __new__(
256+
cls,
257+
data: Sequence[list[str]],
258+
index: Axes | None = ...,
259+
*,
260+
dtype: Dtype = ...,
261+
name: Hashable = ...,
262+
copy: bool = ...,
263+
) -> Series[list[str]]: ...
264+
@overload
265+
def __new__(
266+
cls,
267+
data: Sequence[str],
268+
index: Axes | None = ...,
269+
*,
270+
dtype: Dtype = ...,
271+
name: Hashable = ...,
272+
copy: bool = ...,
273+
) -> Series[str]: ...
274+
@overload
255275
def __new__(
256276
cls,
257277
data: (
@@ -1199,7 +1219,9 @@ class Series(IndexOpsMixin[S1], NDFrame):
11991219
) -> Series[S1]: ...
12001220
def to_period(self, freq: _str | None = ..., copy: _bool = ...) -> DataFrame: ...
12011221
@property
1202-
def str(self) -> StringMethods[Series, DataFrame, Series[bool]]: ...
1222+
def str(
1223+
self,
1224+
) -> StringMethods[Series, DataFrame, Series[bool], Series[list[str]]]: ...
12031225
@property
12041226
def dt(self) -> CombinedDatetimelikeProperties: ...
12051227
@property

pandas-stubs/core/strings.pyi

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import numpy as np
1515
import pandas as pd
1616
from pandas import (
1717
DataFrame,
18+
Index,
1819
MultiIndex,
1920
Series,
2021
)
@@ -28,10 +29,12 @@ from pandas._typing import (
2829

2930
# The _TS type is what is used for the result of str.split with expand=True
3031
_TS = TypeVar("_TS", DataFrame, MultiIndex)
32+
# The _TS2 type is what is used for the result of str.split with expand=False
33+
_TS2 = TypeVar("_TS2", Series[list[str]], Index[list[str]])
3134
# The _TM type is what is used for the result of str.match
3235
_TM = TypeVar("_TM", Series[bool], np_ndarray_bool)
3336

34-
class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]):
37+
class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM, _TS2]):
3538
def __init__(self, data: T) -> None: ...
3639
def __getitem__(self, key: slice | int) -> T: ...
3740
def __iter__(self) -> T: ...
@@ -66,12 +69,19 @@ class StringMethods(NoNewAttributesMixin, Generic[T, _TS, _TM]):
6669
) -> _TS: ...
6770
@overload
6871
def split(
69-
self, pat: str = ..., *, n: int = ..., expand: bool = ..., regex: bool = ...
70-
) -> T: ...
72+
self,
73+
pat: str = ...,
74+
*,
75+
n: int = ...,
76+
expand: Literal[False] = ...,
77+
regex: bool = ...,
78+
) -> _TS2: ...
7179
@overload
7280
def rsplit(self, pat: str = ..., *, n: int = ..., expand: Literal[True]) -> _TS: ...
7381
@overload
74-
def rsplit(self, pat: str = ..., *, n: int = ..., expand: bool = ...) -> T: ...
82+
def rsplit(
83+
self, pat: str = ..., *, n: int = ..., expand: Literal[False] = ...
84+
) -> _TS2: ...
7585
@overload
7686
def partition(self, sep: str = ...) -> pd.DataFrame: ...
7787
@overload

tests/test_indexes.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,25 @@ def test_difference_none() -> None:
111111
def test_str_split() -> None:
112112
# GH 194
113113
ind = pd.Index(["a-b", "c-d"])
114-
check(assert_type(ind.str.split("-"), "pd.Index[str]"), pd.Index)
114+
check(assert_type(ind.str.split("-"), "pd.Index[list[str]]"), pd.Index, list)
115115
check(assert_type(ind.str.split("-", expand=True), pd.MultiIndex), pd.MultiIndex)
116+
check(
117+
assert_type(ind.str.split("-", expand=False), "pd.Index[list[str]]"),
118+
pd.Index,
119+
list,
120+
)
121+
122+
123+
def test_str_rsplit() -> None:
124+
# GH 1074
125+
ind = pd.Index(["a-b", "c-d"])
126+
check(assert_type(ind.str.rsplit("-"), "pd.Index[list[str]]"), pd.Index, list)
127+
check(assert_type(ind.str.rsplit("-", expand=True), pd.MultiIndex), pd.MultiIndex)
128+
check(
129+
assert_type(ind.str.rsplit("-", expand=False), "pd.Index[list[str]]"),
130+
pd.Index,
131+
list,
132+
)
116133

117134

118135
def test_str_match() -> None:

tests/test_series.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,14 +1548,24 @@ def test_string_accessors():
15481548
check(assert_type(s.str.rindex("p"), pd.Series), pd.Series)
15491549
check(assert_type(s.str.rjust(80), pd.Series), pd.Series)
15501550
check(assert_type(s.str.rpartition("p"), pd.DataFrame), pd.DataFrame)
1551-
check(assert_type(s.str.rsplit("a"), pd.Series), pd.Series)
1551+
check(assert_type(s.str.rsplit("a"), "pd.Series[list[str]]"), pd.Series, list)
15521552
check(assert_type(s.str.rsplit("a", expand=True), pd.DataFrame), pd.DataFrame)
1553+
check(
1554+
assert_type(s.str.rsplit("a", expand=False), "pd.Series[list[str]]"),
1555+
pd.Series,
1556+
list,
1557+
)
15531558
check(assert_type(s.str.rstrip(), pd.Series), pd.Series)
15541559
check(assert_type(s.str.slice(0, 4, 2), pd.Series), pd.Series)
15551560
check(assert_type(s.str.slice_replace(0, 2, "XX"), pd.Series), pd.Series)
1556-
check(assert_type(s.str.split("a"), pd.Series), pd.Series)
1561+
check(assert_type(s.str.split("a"), "pd.Series[list[str]]"), pd.Series, list)
15571562
# GH 194
15581563
check(assert_type(s.str.split("a", expand=True), pd.DataFrame), pd.DataFrame)
1564+
check(
1565+
assert_type(s.str.split("a", expand=False), "pd.Series[list[str]]"),
1566+
pd.Series,
1567+
list,
1568+
)
15591569
check(assert_type(s.str.startswith("a"), "pd.Series[bool]"), pd.Series, np.bool_)
15601570
check(
15611571
assert_type(s.str.startswith(("a", "b")), "pd.Series[bool]"),

0 commit comments

Comments
 (0)