Skip to content

Commit e365f01

Browse files
Merge remote-tracking branch 'upstream/master' into arrow-string-array-dtype
2 parents 3399f08 + d662e97 commit e365f01

File tree

296 files changed

+8093
-4120
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

296 files changed

+8093
-4120
lines changed

.github/workflows/ci.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ jobs:
2222

2323
steps:
2424
- name: Checkout
25-
uses: actions/checkout@v1
25+
uses: actions/checkout@v2
26+
with:
27+
fetch-depth: 0
2628

2729
- name: Looking for unwanted patterns
2830
run: ci/code_checks.sh patterns
@@ -94,7 +96,9 @@ jobs:
9496
steps:
9597

9698
- name: Checkout
97-
uses: actions/checkout@v1
99+
uses: actions/checkout@v2
100+
with:
101+
fetch-depth: 0
98102

99103
- name: Set up pandas
100104
uses: ./.github/actions/setup
@@ -147,7 +151,9 @@ jobs:
147151
steps:
148152

149153
- name: Checkout
150-
uses: actions/checkout@v1
154+
uses: actions/checkout@v2
155+
with:
156+
fetch-depth: 0
151157

152158
- name: Set up pandas
153159
uses: ./.github/actions/setup

.github/workflows/database.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,12 @@ jobs:
5656

5757
steps:
5858
- name: Checkout
59-
uses: actions/checkout@v1
59+
uses: actions/checkout@v2
60+
with:
61+
fetch-depth: 0
6062

6163
- name: Cache conda
62-
uses: actions/cache@v1
64+
uses: actions/cache@v2
6365
env:
6466
CACHE_NUMBER: 0
6567
with:
@@ -70,7 +72,7 @@ jobs:
7072
- uses: conda-incubator/setup-miniconda@v2
7173
with:
7274
activate-environment: pandas-dev
73-
channel-priority: strict
75+
channel-priority: flexible
7476
environment-file: ${{ matrix.ENV_FILE }}
7577
use-only-tar-bz2: true
7678

.github/workflows/posix.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ jobs:
4444

4545
steps:
4646
- name: Checkout
47-
uses: actions/checkout@v1
47+
uses: actions/checkout@v2
48+
with:
49+
fetch-depth: 0
4850

4951
- name: Cache conda
50-
uses: actions/cache@v1
52+
uses: actions/cache@v2
5153
env:
5254
CACHE_NUMBER: 0
5355
with:

.pre-commit-config.yaml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@ repos:
1919
types_or: [python, rst, markdown]
2020
files: ^(pandas|doc)/
2121
- repo: https://github.com/pre-commit/pre-commit-hooks
22-
rev: v3.4.0
22+
rev: v4.0.1
2323
hooks:
24+
- id: debug-statements
2425
- id: end-of-file-fixer
2526
exclude: \.txt$
2627
- id: trailing-whitespace
2728
- repo: https://github.com/cpplint/cpplint
28-
rev: f7061b1 # the latest tag does not have the hook
29+
rev: 1.5.5
2930
hooks:
3031
- id: cpplint
3132
# We don't lint all C files because we don't want to lint any that are built
@@ -35,7 +36,7 @@ repos:
3536
exclude: ^pandas/_libs/src/(klib|headers)/
3637
args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
3738
- repo: https://gitlab.com/pycqa/flake8
38-
rev: 3.9.1
39+
rev: 3.9.2
3940
hooks:
4041
- id: flake8
4142
additional_dependencies:
@@ -56,7 +57,7 @@ repos:
5657
hooks:
5758
- id: isort
5859
- repo: https://github.com/asottile/pyupgrade
59-
rev: v2.12.0
60+
rev: v2.18.3
6061
hooks:
6162
- id: pyupgrade
6263
args: [--py37-plus]
@@ -71,11 +72,11 @@ repos:
7172
types: [text] # overwrite types: [rst]
7273
types_or: [python, rst]
7374
- repo: https://github.com/asottile/yesqa
74-
rev: v1.2.2
75+
rev: v1.2.3
7576
hooks:
7677
- id: yesqa
7778
additional_dependencies:
78-
- flake8==3.9.1
79+
- flake8==3.9.2
7980
- flake8-comprehensions==3.1.0
8081
- flake8-bugbear==21.3.2
8182
- pandas-dev-flaker==0.2.0

asv_bench/benchmarks/io/style.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,19 @@ def setup(self, cols, rows):
2020

2121
def time_apply_render(self, cols, rows):
2222
self._style_apply()
23-
self.st._render_html()
23+
self.st._render_html(True, True)
2424

2525
def peakmem_apply_render(self, cols, rows):
2626
self._style_apply()
27-
self.st._render_html()
27+
self.st._render_html(True, True)
2828

2929
def time_classes_render(self, cols, rows):
3030
self._style_classes()
31-
self.st._render_html()
31+
self.st._render_html(True, True)
3232

3333
def peakmem_classes_render(self, cols, rows):
3434
self._style_classes()
35-
self.st._render_html()
35+
self.st._render_html(True, True)
3636

3737
def time_format_render(self, cols, rows):
3838
self._style_format()

asv_bench/benchmarks/strings.py

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,19 @@
1111
from .pandas_vb_common import tm
1212

1313

14+
class Dtypes:
15+
params = ["str", "string", "arrow_string"]
16+
param_names = ["dtype"]
17+
18+
def setup(self, dtype):
19+
from pandas.core.arrays.string_arrow import ArrowStringDtype # noqa: F401
20+
21+
try:
22+
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
23+
except ImportError:
24+
raise NotImplementedError
25+
26+
1427
class Construction:
1528

1629
params = ["str", "string"]
@@ -49,16 +62,7 @@ def peakmem_cat_frame_construction(self, dtype):
4962
DataFrame(self.frame_cat_arr, dtype=dtype)
5063

5164

52-
class Methods:
53-
params = ["str", "string[python]", "string[pyarrow]"]
54-
param_names = ["dtype"]
55-
56-
def setup(self, dtype):
57-
try:
58-
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
59-
except ImportError:
60-
raise NotImplementedError
61-
65+
class Methods(Dtypes):
6266
def time_center(self, dtype):
6367
self.s.str.center(100)
6468

@@ -81,6 +85,9 @@ def time_find(self, dtype):
8185
def time_rfind(self, dtype):
8286
self.s.str.rfind("[A-Z]+")
8387

88+
def time_fullmatch(self, dtype):
89+
self.s.str.fullmatch("A")
90+
8491
def time_get(self, dtype):
8592
self.s.str.get(0)
8693

@@ -209,31 +216,26 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
209216
self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep)
210217

211218

212-
class Contains:
219+
class Contains(Dtypes):
213220

214-
params = (["str", "string[python]", "string[pyarrow]"], [True, False])
221+
params = (Dtypes.params, [True, False])
215222
param_names = ["dtype", "regex"]
216223

217224
def setup(self, dtype, regex):
218-
try:
219-
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
220-
except ImportError:
221-
raise NotImplementedError
225+
super().setup(dtype)
222226

223227
def time_contains(self, dtype, regex):
224228
self.s.str.contains("A", regex=regex)
225229

226230

227-
class Split:
231+
class Split(Dtypes):
228232

229-
params = (["str", "string[python]", "string[pyarrow]"], [True, False])
233+
params = (Dtypes.params, [True, False])
230234
param_names = ["dtype", "expand"]
231235

232236
def setup(self, dtype, expand):
233-
try:
234-
self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype).str.join("--")
235-
except ImportError:
236-
raise NotImplementedError
237+
super().setup(dtype)
238+
self.s = self.s.str.join("--")
237239

238240
def time_split(self, dtype, expand):
239241
self.s.str.split("--", expand=expand)
@@ -242,11 +244,25 @@ def time_rsplit(self, dtype, expand):
242244
self.s.str.rsplit("--", expand=expand)
243245

244246

245-
class Dummies:
246-
def setup(self):
247-
self.s = Series(tm.makeStringIndex(10 ** 5)).str.join("|")
247+
class Extract(Dtypes):
248+
249+
params = (Dtypes.params, [True, False])
250+
param_names = ["dtype", "expand"]
248251

249-
def time_get_dummies(self):
252+
def setup(self, dtype, expand):
253+
super().setup(dtype)
254+
255+
def time_extract_single_group(self, dtype, expand):
256+
with warnings.catch_warnings(record=True):
257+
self.s.str.extract("(\\w*)A", expand=expand)
258+
259+
260+
class Dummies(Dtypes):
261+
def setup(self, dtype):
262+
super().setup(dtype)
263+
self.s = self.s.str.join("|")
264+
265+
def time_get_dummies(self, dtype):
250266
self.s.str.get_dummies("|")
251267

252268

@@ -265,3 +281,9 @@ def setup(self):
265281
def time_vector_slice(self):
266282
# GH 2602
267283
self.s.str[:5]
284+
285+
286+
class Iter(Dtypes):
287+
def time_iter(self, dtype):
288+
for i in self.s:
289+
pass

ci/deps/actions-37-db-min.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66

77
# tools
88
- cython>=0.29.21
9-
- pytest>=5.0.1
9+
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.21
1212
- hypothesis>=3.58.0
@@ -31,7 +31,8 @@ dependencies:
3131
- openpyxl
3232
- pandas-gbq
3333
- google-cloud-bigquery>=1.27.2 # GH 36436
34-
- pyarrow=0.17 # GH 38803
34+
- protobuf>=3.12.4
35+
- pyarrow=0.17.1 # GH 38803
3536
- pytables>=3.5.1
3637
- scipy
3738
- xarray=0.12.3

ci/deps/actions-37-db.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66

77
# tools
88
- cython>=0.29.21
9-
- pytest>=5.0.1
9+
- pytest>=6.0
1010
- pytest-xdist>=1.21
1111
- hypothesis>=3.58.0
1212
- pytest-cov>=2.10.1 # this is only needed in the coverage build, ref: GH 35737
@@ -15,7 +15,7 @@ dependencies:
1515
- beautifulsoup4
1616
- botocore>=1.11
1717
- dask
18-
- fastparquet>=0.4.0, <=0.5.0
18+
- fastparquet>=0.4.0
1919
- fsspec>=0.7.4
2020
- gcsfs>=0.6.0
2121
- geopandas
@@ -25,13 +25,13 @@ dependencies:
2525
- flask
2626
- nomkl
2727
- numexpr
28-
- numpy=1.16.*
28+
- numpy=1.17.*
2929
- odfpy
3030
- openpyxl
3131
- pandas-gbq
3232
- google-cloud-bigquery>=1.27.2 # GH 36436
3333
- psycopg2
34-
- pyarrow>=0.15.0
34+
- pyarrow>=0.17.0
3535
- pymysql
3636
- pytables
3737
- python-snappy

ci/deps/actions-37-locale_slow.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77

88
# tools
99
- cython>=0.29.21
10-
- pytest>=5.0.1
10+
- pytest>=6.0
1111
- pytest-cov
1212
- pytest-xdist>=1.21
1313
- hypothesis>=3.58.0
@@ -17,13 +17,13 @@ dependencies:
1717
- bottleneck=1.2.*
1818
- lxml
1919
- matplotlib=3.0.0
20-
- numpy=1.16.*
20+
- numpy=1.17.*
2121
- openpyxl=3.0.0
2222
- python-dateutil
2323
- python-blosc
2424
- pytz=2017.3
2525
- scipy
26-
- sqlalchemy=1.2.8
26+
- sqlalchemy=1.3.0
2727
- xlrd=1.2.0
2828
- xlsxwriter=1.0.2
2929
- xlwt=1.3.0

ci/deps/actions-37-minimum_versions.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ dependencies:
1717
- bottleneck=1.2.1
1818
- jinja2=2.10
1919
- numba=0.46.0
20-
- numexpr=2.6.8
20+
- numexpr=2.7.0
2121
- numpy=1.17.3
2222
- openpyxl=3.0.0
2323
- pytables=3.5.1
2424
- python-dateutil=2.7.3
2525
- pytz=2017.3
26-
- pyarrow=0.15
26+
- pyarrow=0.17.0
2727
- scipy=1.2
2828
- xlrd=1.2.0
2929
- xlsxwriter=1.0.2

ci/deps/actions-37-slow.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77

88
# tools
99
- cython>=0.29.21
10-
- pytest>=5.0.1
10+
- pytest>=6.0
1111
- pytest-cov
1212
- pytest-xdist>=1.21
1313
- hypothesis>=3.58.0

ci/deps/actions-37.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77

88
# tools
99
- cython>=0.29.21
10-
- pytest>=5.0.1
10+
- pytest>=6.0
1111
- pytest-cov
1212
- pytest-xdist>=1.21
1313
- hypothesis>=3.58.0
@@ -18,7 +18,7 @@ dependencies:
1818
- numpy=1.19
1919
- python-dateutil
2020
- nomkl
21-
- pyarrow=0.15.1
21+
- pyarrow
2222
- pytz
2323
- s3fs>=0.4.0
2424
- moto>=1.3.14

0 commit comments

Comments
 (0)