Skip to content

Commit 6282de6

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 136175c + d1b1720 commit 6282de6

File tree

15 files changed

+491
-148
lines changed

15 files changed

+491
-148
lines changed

.travis.yml

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,6 @@ matrix:
6666
apt:
6767
packages:
6868
- python-gtk2
69-
- python: 3.4
70-
env:
71-
- PYTHON_VERSION=3.4
72-
- JOB_NAME: "34_nslow"
73-
- NOSE_ARGS="not slow and not disabled"
74-
- FULL_DEPS=true
75-
- CLIPBOARD=xsel
76-
- CACHE_NAME="34_nslow"
77-
- USE_CACHE=true
78-
addons:
79-
apt:
80-
packages:
81-
- xsel
8269
- python: 3.5
8370
env:
8471
- PYTHON_VERSION=3.5
@@ -93,6 +80,33 @@ matrix:
9380
apt:
9481
packages:
9582
- xsel
83+
- python: 3.6-dev
84+
env:
85+
- PYTHON_VERSION=3.6
86+
- JOB_NAME: "36_dev"
87+
- JOB_TAG=_DEV
88+
- NOSE_ARGS="not slow and not network and not disabled"
89+
- PANDAS_TESTING_MODE="deprecate"
90+
addons:
91+
apt:
92+
packages:
93+
- libatlas-base-dev
94+
- gfortran
95+
# In allow_failures
96+
- python: 2.7
97+
env:
98+
- PYTHON_VERSION=2.7
99+
- JOB_NAME: "27_nslow_nnet_COMPAT"
100+
- NOSE_ARGS="not slow and not network and not disabled"
101+
- LOCALE_OVERRIDE="it_IT.UTF-8"
102+
- INSTALL_TEST=true
103+
- JOB_TAG=_COMPAT
104+
- CACHE_NAME="27_nslow_nnet_COMPAT"
105+
- USE_CACHE=true
106+
addons:
107+
apt:
108+
packages:
109+
- language-pack-it
96110
# In allow_failures
97111
- python: 2.7
98112
env:
@@ -103,45 +117,46 @@ matrix:
103117
- FULL_DEPS=true
104118
- CACHE_NAME="27_slow"
105119
- USE_CACHE=true
120+
# In allow_failures
121+
- python: 2.7
122+
env:
123+
- PYTHON_VERSION=2.7
124+
- JOB_NAME: "27_build_test_conda"
125+
- JOB_TAG=_BUILD_TEST
126+
- NOSE_ARGS="not slow and not disabled"
127+
- FULL_DEPS=true
128+
- BUILD_TEST=true
129+
- CACHE_NAME="27_build_test_conda"
130+
- USE_CACHE=true
106131
# In allow_failures
107132
- python: 3.4
108133
env:
109134
- PYTHON_VERSION=3.4
110-
- JOB_NAME: "34_slow"
111-
- JOB_TAG=_SLOW
112-
- NOSE_ARGS="slow and not network and not disabled"
135+
- JOB_NAME: "34_nslow"
136+
- NOSE_ARGS="not slow and not disabled"
113137
- FULL_DEPS=true
114138
- CLIPBOARD=xsel
115-
- CACHE_NAME="34_slow"
139+
- CACHE_NAME="34_nslow"
116140
- USE_CACHE=true
117141
addons:
118142
apt:
119143
packages:
120144
- xsel
121145
# In allow_failures
122-
- python: 2.7
146+
- python: 3.4
123147
env:
124-
- PYTHON_VERSION=2.7
125-
- JOB_NAME: "27_build_test_conda"
126-
- JOB_TAG=_BUILD_TEST
127-
- NOSE_ARGS="not slow and not disabled"
148+
- PYTHON_VERSION=3.4
149+
- JOB_NAME: "34_slow"
150+
- JOB_TAG=_SLOW
151+
- NOSE_ARGS="slow and not network and not disabled"
128152
- FULL_DEPS=true
129-
- BUILD_TEST=true
130-
- CACHE_NAME="27_build_test_conda"
153+
- CLIPBOARD=xsel
154+
- CACHE_NAME="34_slow"
131155
- USE_CACHE=true
132-
# In allow_failures
133-
- python: 3.6-dev
134-
env:
135-
- PYTHON_VERSION=3.6
136-
- JOB_NAME: "36_dev"
137-
- JOB_TAG=_DEV
138-
- NOSE_ARGS="not slow and not network and not disabled"
139-
- PANDAS_TESTING_MODE="deprecate"
140156
addons:
141157
apt:
142158
packages:
143-
- libatlas-base-dev
144-
- gfortran
159+
- xsel
145160
# In allow_failures
146161
- python: 3.5
147162
env:
@@ -157,21 +172,6 @@ matrix:
157172
packages:
158173
- libatlas-base-dev
159174
- gfortran
160-
# In allow_failures
161-
- python: 2.7
162-
env:
163-
- PYTHON_VERSION=2.7
164-
- JOB_NAME: "27_nslow_nnet_COMPAT"
165-
- NOSE_ARGS="not slow and not network and not disabled"
166-
- LOCALE_OVERRIDE="it_IT.UTF-8"
167-
- INSTALL_TEST=true
168-
- JOB_TAG=_COMPAT
169-
- CACHE_NAME="27_nslow_nnet_COMPAT"
170-
- USE_CACHE=true
171-
addons:
172-
apt:
173-
packages:
174-
- language-pack-it
175175
# In allow_failures
176176
- python: 3.5
177177
env:
@@ -226,18 +226,19 @@ matrix:
226226
- BUILD_TEST=true
227227
- CACHE_NAME="27_build_test_conda"
228228
- USE_CACHE=true
229-
- python: 3.6-dev
229+
- python: 3.4
230230
env:
231-
- PYTHON_VERSION=3.6
232-
- JOB_NAME: "36_dev"
233-
- JOB_TAG=_DEV
234-
- NOSE_ARGS="not slow and not network and not disabled"
235-
- PANDAS_TESTING_MODE="deprecate"
231+
- PYTHON_VERSION=3.4
232+
- JOB_NAME: "34_nslow"
233+
- NOSE_ARGS="not slow and not disabled"
234+
- FULL_DEPS=true
235+
- CLIPBOARD=xsel
236+
- CACHE_NAME="34_nslow"
237+
- USE_CACHE=true
236238
addons:
237239
apt:
238240
packages:
239-
- libatlas-base-dev
240-
- gfortran
241+
- xsel
241242
- python: 3.5
242243
env:
243244
- PYTHON_VERSION=3.5

asv_bench/benchmarks/algorithms.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ class Algorithms(object):
88

99
def setup(self):
1010
N = 100000
11+
np.random.seed(1234)
1112

1213
self.int_unique = pd.Int64Index(np.arange(N * 5))
1314
# cache is_unique
@@ -23,11 +24,15 @@ def setup(self):
2324
self.arrpos = np.arange(1000000)
2425
self.arrneg = np.arange(-1000000, 0)
2526
self.arrmixed = np.array([1, -1]).repeat(500000)
27+
self.strings = tm.makeStringIndex(100000)
2628

2729
# match
2830
self.uniques = tm.makeStringIndex(1000).values
2931
self.all = self.uniques.repeat(10)
3032

33+
def time_factorize_string(self):
34+
self.strings.factorize()
35+
3136
def time_factorize_int(self):
3237
self.int.factorize()
3338

asv_bench/benchmarks/gil.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,38 @@ def pg_read_csv_datetime(self):
379379

380380
def time_read_csv_datetime(self):
381381
self.pg_read_csv_datetime()
382+
383+
384+
class nogil_factorize(object):
385+
number = 1
386+
repeat = 5
387+
388+
def setup(self):
389+
if (not have_real_test_parallel):
390+
raise NotImplementedError
391+
392+
np.random.seed(1234)
393+
self.strings = tm.makeStringIndex(100000)
394+
395+
def factorize_strings(self):
396+
pd.factorize(self.strings)
397+
398+
@test_parallel(num_threads=4)
399+
def _pg_factorize_strings_4(self):
400+
self.factorize_strings()
401+
402+
def time_factorize_strings_4(self):
403+
for i in range(2):
404+
self._pg_factorize_strings_4()
405+
406+
@test_parallel(num_threads=2)
407+
def _pg_factorize_strings_2(self):
408+
self.factorize_strings()
409+
410+
def time_factorize_strings_2(self):
411+
for i in range(4):
412+
self._pg_factorize_strings_2()
413+
414+
def time_factorize_strings(self):
415+
for i in range(8):
416+
self.factorize_strings()

doc/source/contributing.rst

Lines changed: 78 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,12 @@ want to clone your fork to your machine::
113113
This creates the directory `pandas-yourname` and connects your repository to
114114
the upstream (main project) *pandas* repository.
115115

116-
The testing suite will run automatically on Travis-CI once your pull request is
117-
submitted. However, if you wish to run the test suite on a branch prior to
118-
submitting the pull request, then Travis-CI needs to be hooked up to your
119-
GitHub repository. Instructions for doing so are `here
120-
<http://about.travis-ci.org/docs/user/getting-started/>`__.
116+
The testing suite will run automatically on Travis-CI and Appveyor once your
117+
pull request is submitted. However, if you wish to run the test suite on a
118+
branch prior to submitting the pull request, then Travis-CI and/or AppVeyor
119+
need to be hooked up to your GitHub repository. Instructions for doing so
120+
are `here <http://about.travis-ci.org/docs/user/getting-started/>`__ for
121+
Travis-CI and `here <https://www.appveyor.com/docs/>`__ for AppVeyor.
121122

122123
Creating a branch
123124
-----------------
@@ -142,7 +143,7 @@ To update this branch, you need to retrieve the changes from the master branch::
142143
git fetch upstream
143144
git rebase upstream/master
144145

145-
This will replay your commits on top of the lastest pandas git master. If this
146+
This will replay your commits on top of the latest pandas git master. If this
146147
leads to merge conflicts, you must resolve these before submitting your pull
147148
request. If you have uncommitted changes, you will need to ``stash`` them prior
148149
to updating. This will effectively store your changes and they can be reapplied
@@ -396,7 +397,7 @@ evocations, sphinx will try to only build the pages that have been modified.
396397
If you want to do a full clean build, do::
397398

398399
python make.py clean
399-
python make.py build
400+
python make.py html
400401

401402
Starting with *pandas* 0.13.1 you can tell ``make.py`` to compile only a single section
402403
of the docs, greatly reducing the turn-around time for checking your changes.
@@ -442,18 +443,80 @@ Contributing to the code base
442443
Code standards
443444
--------------
444445

446+
Writing good code is not just about what you write. It is also about *how* you
447+
write it. During testing on Travis-CI, several tools will be run to check your
448+
code for stylistic errors. Generating any warnings will cause the test to fail.
449+
Thus, good style is a requirement for submitting code to *pandas*.
450+
451+
In addition, because a lot of people use our library, it is important that we
452+
do not make sudden changes to the code that could have the potential to break
453+
a lot of user code as a result, that is, we need it to be as *backwards compatible*
454+
as possible to avoid mass breakages.
455+
456+
Additional standards are outlined on the `code style wiki
457+
page <https://github.com/pandas-dev/pandas/wiki/Code-Style-and-Conventions>`_.
458+
459+
C (cpplint)
460+
~~~~~~~~~~~
461+
462+
*pandas* uses the `Google <https://google.github.io/styleguide/cppguide.html>`_
463+
standard. Google provides an open source style checker called ``cpplint``, but we
464+
use a fork of it that can be found `here <https://github.com/cpplint/cpplint>`_.
465+
Here are *some* of the more common ``cpplint`` issues:
466+
467+
- we restrict line-length to 80 characters to promote readability
468+
- every header file must include a header guard to avoid name collisions if re-included
469+
470+
Travis-CI will run the `cpplint <https://pypi.python.org/pypi/cpplint>`_ tool
471+
and report any stylistic errors in your code. Therefore, it is helpful before
472+
submitting code to run the check yourself::
473+
474+
cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir modified-c-file
475+
476+
You can also run this command on an entire directory if necessary::
477+
478+
cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive modified-c-directory
479+
480+
To make your commits compliant with this standard, you can install the
481+
`ClangFormat <http://clang.llvm.org/docs/ClangFormat.html>`_ tool, which can be
482+
downloaded `here <http://llvm.org/builds/>`_. To configure, in your home directory,
483+
run the following command::
484+
485+
clang-format style=google -dump-config > .clang-format
486+
487+
Then modify the file to ensure that any indentation width parameters are at least four.
488+
Once configured, you can run the tool as follows::
489+
490+
clang-format modified-c-file
491+
492+
This will output what your file will look like if the changes are made, and to apply
493+
them, just run the following command::
494+
495+
clang-format -i modified-c-file
496+
497+
To run the tool on an entire directory, you can run the following analogous commands::
498+
499+
clang-format modified-c-directory/*.c modified-c-directory/*.h
500+
clang-format -i modified-c-directory/*.c modified-c-directory/*.h
501+
502+
Do note that this tool is best-effort, meaning that it will try to correct as
503+
many errors as possible, but it may not correct *all* of them. Thus, it is
504+
recommended that you run ``cpplint`` to double check and make any other style
505+
fixes manually.
506+
507+
Python (PEP8)
508+
~~~~~~~~~~~~~
509+
445510
*pandas* uses the `PEP8 <http://www.python.org/dev/peps/pep-0008/>`_ standard.
446511
There are several tools to ensure you abide by this standard. Here are *some* of
447512
the more common ``PEP8`` issues:
448513

449-
- we restrict line-length to 80 characters to promote readability
514+
- we restrict line-length to 79 characters to promote readability
450515
- passing arguments should have spaces after commas, e.g. ``foo(arg1, arg2, kw1='bar')``
451516

452-
The Travis-CI will run `flake8 <http://pypi.python.org/pypi/flake8>`_ tool and report
453-
any stylistic errors in your code. Generating any warnings will cause the build to fail;
454-
thus these are part of the requirements for submitting code to *pandas*.
455-
456-
It is helpful before submitting code to run this yourself on the diff::
517+
Travis-CI will run the `flake8 <http://pypi.python.org/pypi/flake8>`_ tool
518+
and report any stylistic errors in your code. Therefore, it is helpful before
519+
submitting code to run the check yourself on the diff::
457520

458521
git diff master | flake8 --diff
459522

@@ -466,8 +529,8 @@ and make these changes with::
466529

467530
pep8radius master --diff --in-place
468531

469-
Additional standards are outlined on the `code style wiki
470-
page <https://github.com/pandas-dev/pandas/wiki/Code-Style-and-Conventions>`_.
532+
Backwards Compatibility
533+
~~~~~~~~~~~~~~~~~~~~~~~
471534

472535
Please try to maintain backward compatibility. *pandas* has lots of users with lots of
473536
existing code, so don't break it if at all possible. If you think breakage is required,

doc/source/io.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4004,7 +4004,7 @@ and data values from the values and assembles them into a ``data.frame``:
40044004
name_paths = paste(listing$group[name_nodes], listing$name[name_nodes], sep = "/")
40054005
columns = list()
40064006
for (idx in seq(data_paths)) {
4007-
# NOTE: matrices returned by h5read have to be transposed to to obtain
4007+
# NOTE: matrices returned by h5read have to be transposed to obtain
40084008
# required Fortran order!
40094009
data <- data.frame(t(h5read(h5File, data_paths[idx])))
40104010
names <- t(h5read(h5File, name_paths[idx]))

doc/source/whatsnew/v0.19.2.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ Bug Fixes
7878
- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`)
7979
- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`)
8080
- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`)
81-
81+
- Bug in ``DataFrame.combine_first()`` for integer columns (:issue:`14687`).
8282

8383
- Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`)
8484
- Bug in ``pd.read_csv()`` in which the ``nrows`` parameter was not being respected for large input when using the C engine for parsing (:issue:`7626`)
@@ -88,4 +88,6 @@ Bug Fixes
8888

8989
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
9090

91+
- Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`)
92+
9193
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)

0 commit comments

Comments
 (0)