pandas-dev
diff --git a/‎.travis.yml
Lines changed: 58 additions & 57 deletions b/‎.travis.yml
Lines changed: 58 additions & 57 deletions
diff --git a/‎asv_bench/benchmarks/algorithms.py
Lines changed: 5 additions & 0 deletions b/‎asv_bench/benchmarks/algorithms.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/gil.py
Lines changed: 35 additions & 0 deletions b/‎asv_bench/benchmarks/gil.py
Lines changed: 35 additions & 0 deletions
diff --git a/‎doc/source/contributing.rst
Lines changed: 78 additions & 15 deletions b/‎doc/source/contributing.rst
Lines changed: 78 additions & 15 deletions
diff --git a/‎doc/source/io.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/io.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v0.19.2.txt
Lines changed: 3 additions & 1 deletion b/‎doc/source/whatsnew/v0.19.2.txt
Lines changed: 3 additions & 1 deletion
@@ -66,19 +66,6 @@ matrix:
         apt:
           packages:
           - python-gtk2
-    - python: 3.4
-      env:
-        - PYTHON_VERSION=3.4
-        - JOB_NAME: "34_nslow"
-        - NOSE_ARGS="not slow and not disabled"
-        - FULL_DEPS=true
-        - CLIPBOARD=xsel
-        - CACHE_NAME="34_nslow"
-        - USE_CACHE=true
-      addons:
-        apt:
-          packages:
-          - xsel
     - python: 3.5
       env:
         - PYTHON_VERSION=3.5
@@ -93,6 +80,33 @@ matrix:
         apt:
           packages:
           - xsel
+    - python: 3.6-dev
+      env:
+        - PYTHON_VERSION=3.6
+        - JOB_NAME: "36_dev"
+        - JOB_TAG=_DEV
+        - NOSE_ARGS="not slow and not network and not disabled"
+        - PANDAS_TESTING_MODE="deprecate"
+      addons:
+        apt:
+          packages:
+          - libatlas-base-dev
+          - gfortran
+#    In allow_failures
+    - python: 2.7
+      env:
+        - PYTHON_VERSION=2.7
+        - JOB_NAME: "27_nslow_nnet_COMPAT"
+        - NOSE_ARGS="not slow and not network and not disabled"
+        - LOCALE_OVERRIDE="it_IT.UTF-8"
+        - INSTALL_TEST=true
+        - JOB_TAG=_COMPAT
+        - CACHE_NAME="27_nslow_nnet_COMPAT"
+        - USE_CACHE=true
+      addons:
+        apt:
+          packages:
+          - language-pack-it
 #    In allow_failures
     - python: 2.7
       env:
@@ -103,45 +117,46 @@ matrix:
         - FULL_DEPS=true
         - CACHE_NAME="27_slow"
         - USE_CACHE=true
+#    In allow_failures
+    - python: 2.7
+      env:
+        - PYTHON_VERSION=2.7
+        - JOB_NAME: "27_build_test_conda"
+        - JOB_TAG=_BUILD_TEST
+        - NOSE_ARGS="not slow and not disabled"
+        - FULL_DEPS=true
+        - BUILD_TEST=true
+        - CACHE_NAME="27_build_test_conda"
+        - USE_CACHE=true
 #    In allow_failures
     - python: 3.4
       env:
         - PYTHON_VERSION=3.4
-        - JOB_NAME: "34_slow"
-        - JOB_TAG=_SLOW
-        - NOSE_ARGS="slow and not network and not disabled"
+        - JOB_NAME: "34_nslow"
+        - NOSE_ARGS="not slow and not disabled"
         - FULL_DEPS=true
         - CLIPBOARD=xsel
-        - CACHE_NAME="34_slow"
+        - CACHE_NAME="34_nslow"
         - USE_CACHE=true
       addons:
         apt:
           packages:
           - xsel
 #    In allow_failures
-    - python: 2.7
+    - python: 3.4
       env:
-        - PYTHON_VERSION=2.7
-        - JOB_NAME: "27_build_test_conda"
-        - JOB_TAG=_BUILD_TEST
-        - NOSE_ARGS="not slow and not disabled"
+        - PYTHON_VERSION=3.4
+        - JOB_NAME: "34_slow"
+        - JOB_TAG=_SLOW
+        - NOSE_ARGS="slow and not network and not disabled"
         - FULL_DEPS=true
-        - BUILD_TEST=true
-        - CACHE_NAME="27_build_test_conda"
+        - CLIPBOARD=xsel
+        - CACHE_NAME="34_slow"
         - USE_CACHE=true
-#    In allow_failures
-    - python: 3.6-dev
-      env:
-        - PYTHON_VERSION=3.6
-        - JOB_NAME: "36_dev"
-        - JOB_TAG=_DEV
-        - NOSE_ARGS="not slow and not network and not disabled"
-        - PANDAS_TESTING_MODE="deprecate"
       addons:
         apt:
           packages:
-          - libatlas-base-dev
-          - gfortran
+          - xsel
 #    In allow_failures
     - python: 3.5
       env:
@@ -157,21 +172,6 @@ matrix:
           packages:
           - libatlas-base-dev
           - gfortran
-#    In allow_failures
-    - python: 2.7
-      env:
-        - PYTHON_VERSION=2.7
-        - JOB_NAME: "27_nslow_nnet_COMPAT"
-        - NOSE_ARGS="not slow and not network and not disabled"
-        - LOCALE_OVERRIDE="it_IT.UTF-8"
-        - INSTALL_TEST=true
-        - JOB_TAG=_COMPAT
-        - CACHE_NAME="27_nslow_nnet_COMPAT"
-        - USE_CACHE=true
-      addons:
-        apt:
-          packages:
-          - language-pack-it
 #    In allow_failures
     - python: 3.5
       env:
@@ -226,18 +226,19 @@ matrix:
         - BUILD_TEST=true
         - CACHE_NAME="27_build_test_conda"
         - USE_CACHE=true
-      - python: 3.6-dev
+      - python: 3.4
         env:
-        - PYTHON_VERSION=3.6
-        - JOB_NAME: "36_dev"
-        - JOB_TAG=_DEV
-        - NOSE_ARGS="not slow and not network and not disabled"
-        - PANDAS_TESTING_MODE="deprecate"
+          - PYTHON_VERSION=3.4
+          - JOB_NAME: "34_nslow"
+          - NOSE_ARGS="not slow and not disabled"
+          - FULL_DEPS=true
+          - CLIPBOARD=xsel
+          - CACHE_NAME="34_nslow"
+          - USE_CACHE=true
         addons:
           apt:
             packages:
-            - libatlas-base-dev
-            - gfortran
+            - xsel
       - python: 3.5
         env:
           - PYTHON_VERSION=3.5
 
@@ -8,6 +8,7 @@ class Algorithms(object):
 
     def setup(self):
         N = 100000
+        np.random.seed(1234)
 
         self.int_unique = pd.Int64Index(np.arange(N * 5))
         # cache is_unique
@@ -23,11 +24,15 @@ def setup(self):
         self.arrpos = np.arange(1000000)
         self.arrneg = np.arange(-1000000, 0)
         self.arrmixed = np.array([1, -1]).repeat(500000)
+        self.strings = tm.makeStringIndex(100000)
 
         # match
         self.uniques = tm.makeStringIndex(1000).values
         self.all = self.uniques.repeat(10)
 
+    def time_factorize_string(self):
+        self.strings.factorize()
+
     def time_factorize_int(self):
         self.int.factorize()
 
 
@@ -379,3 +379,38 @@ def pg_read_csv_datetime(self):
 
     def time_read_csv_datetime(self):
         self.pg_read_csv_datetime()
+
+
+class nogil_factorize(object):
+    number = 1
+    repeat = 5
+
+    def setup(self):
+        if (not have_real_test_parallel):
+            raise NotImplementedError
+
+        np.random.seed(1234)
+        self.strings = tm.makeStringIndex(100000)
+
+    def factorize_strings(self):
+        pd.factorize(self.strings)
+
+    @test_parallel(num_threads=4)
+    def _pg_factorize_strings_4(self):
+        self.factorize_strings()
+
+    def time_factorize_strings_4(self):
+        for i in range(2):
+            self._pg_factorize_strings_4()
+
+    @test_parallel(num_threads=2)
+    def _pg_factorize_strings_2(self):
+        self.factorize_strings()
+
+    def time_factorize_strings_2(self):
+        for i in range(4):
+            self._pg_factorize_strings_2()
+
+    def time_factorize_strings(self):
+        for i in range(8):
+            self.factorize_strings()
@@ -113,11 +113,12 @@ want to clone your fork to your machine::
 This creates the directory `pandas-yourname` and connects your repository to
 the upstream (main project) *pandas* repository.
 
-The testing suite will run automatically on Travis-CI once your pull request is
-submitted.  However, if you wish to run the test suite on a branch prior to
-submitting the pull request, then Travis-CI needs to be hooked up to your
-GitHub repository.  Instructions for doing so are `here
-<http://about.travis-ci.org/docs/user/getting-started/>`__.
+The testing suite will run automatically on Travis-CI and Appveyor once your
+pull request is submitted.  However, if you wish to run the test suite on a
+branch prior to submitting the pull request, then Travis-CI and/or AppVeyor
+need to be hooked up to your GitHub repository.  Instructions for doing so
+are `here <http://about.travis-ci.org/docs/user/getting-started/>`__ for
+Travis-CI and `here <https://www.appveyor.com/docs/>`__ for AppVeyor.
 
 Creating a branch
 -----------------
@@ -142,7 +143,7 @@ To update this branch, you need to retrieve the changes from the master branch::
     git fetch upstream
     git rebase upstream/master
 
-This will replay your commits on top of the lastest pandas git master.  If this
+This will replay your commits on top of the latest pandas git master.  If this
 leads to merge conflicts, you must resolve these before submitting your pull
 request.  If you have uncommitted changes, you will need to ``stash`` them prior
 to updating.  This will effectively store your changes and they can be reapplied
@@ -396,7 +397,7 @@ evocations, sphinx will try to only build the pages that have been modified.
 If you want to do a full clean build, do::
 
     python make.py clean
-    python make.py build
+    python make.py html
 
 Starting with *pandas* 0.13.1 you can tell ``make.py`` to compile only a single section
 of the docs, greatly reducing the turn-around time for checking your changes.
@@ -442,18 +443,80 @@ Contributing to the code base
 Code standards
 --------------
 
+Writing good code is not just about what you write. It is also about *how* you
+write it. During testing on Travis-CI, several tools will be run to check your
+code for stylistic errors. Generating any warnings will cause the test to fail.
+Thus, good style is a requirement for submitting code to *pandas*.
+
+In addition, because a lot of people use our library, it is important that we
+do not make sudden changes to the code that could have the potential to break
+a lot of user code as a result, that is, we need it to be as *backwards compatible*
+as possible to avoid mass breakages.
+
+Additional standards are outlined on the `code style wiki
+page <https://github.com/pandas-dev/pandas/wiki/Code-Style-and-Conventions>`_.
+
+C (cpplint)
+~~~~~~~~~~~
+
+*pandas* uses the `Google <https://google.github.io/styleguide/cppguide.html>`_
+standard. Google provides an open source style checker called ``cpplint``, but we
+use a fork of it that can be found `here <https://github.com/cpplint/cpplint>`_.
+Here are *some* of the more common ``cpplint`` issues:
+
+  - we restrict line-length to 80 characters to promote readability
+  - every header file must include a header guard to avoid name collisions if re-included
+
+Travis-CI will run the `cpplint <https://pypi.python.org/pypi/cpplint>`_ tool
+and report any stylistic errors in your code. Therefore, it is helpful before
+submitting code to run the check yourself::
+
+   cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir modified-c-file
+
+You can also run this command on an entire directory if necessary::
+
+   cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive modified-c-directory
+
+To make your commits compliant with this standard, you can install the
+`ClangFormat <http://clang.llvm.org/docs/ClangFormat.html>`_ tool, which can be
+downloaded `here <http://llvm.org/builds/>`_. To configure, in your home directory,
+run the following command::
+
+    clang-format style=google -dump-config  > .clang-format
+
+Then modify the file to ensure that any indentation width parameters are at least four.
+Once configured, you can run the tool as follows::
+
+    clang-format modified-c-file
+
+This will output what your file will look like if the changes are made, and to apply
+them, just run the following command::
+
+    clang-format -i modified-c-file
+
+To run the tool on an entire directory, you can run the following analogous commands::
+
+    clang-format modified-c-directory/*.c modified-c-directory/*.h
+    clang-format -i modified-c-directory/*.c modified-c-directory/*.h
+
+Do note that this tool is best-effort, meaning that it will try to correct as
+many errors as possible, but it may not correct *all* of them. Thus, it is
+recommended that you run ``cpplint`` to double check and make any other style
+fixes manually.
+
+Python (PEP8)
+~~~~~~~~~~~~~
+
 *pandas* uses the `PEP8 <http://www.python.org/dev/peps/pep-0008/>`_ standard.
 There are several tools to ensure you abide by this standard. Here are *some* of
 the more common ``PEP8`` issues:
 
-  - we restrict line-length to 80 characters to promote readability
+  - we restrict line-length to 79 characters to promote readability
   - passing arguments should have spaces after commas, e.g. ``foo(arg1, arg2, kw1='bar')``
 
-The Travis-CI will run `flake8 <http://pypi.python.org/pypi/flake8>`_ tool and report
-any stylistic errors in your code. Generating any warnings will cause the build to fail;
-thus these are part of the requirements for submitting code to *pandas*.
-
-It is helpful before submitting code to run this yourself on the diff::
+Travis-CI will run the `flake8 <http://pypi.python.org/pypi/flake8>`_ tool
+and report any stylistic errors in your code. Therefore, it is helpful before
+submitting code to run the check yourself on the diff::
 
    git diff master | flake8 --diff
 
@@ -466,8 +529,8 @@ and make these changes with::
 
     pep8radius master --diff --in-place
 
-Additional standards are outlined on the `code style wiki
-page <https://github.com/pandas-dev/pandas/wiki/Code-Style-and-Conventions>`_.
+Backwards Compatibility
+~~~~~~~~~~~~~~~~~~~~~~~
 
 Please try to maintain backward compatibility. *pandas* has lots of users with lots of
 existing code, so don't break it if at all possible.  If you think breakage is required,
 
@@ -4004,7 +4004,7 @@ and data values from the values and assembles them into a ``data.frame``:
    name_paths = paste(listing$group[name_nodes], listing$name[name_nodes], sep = "/")
    columns = list()
    for (idx in seq(data_paths)) {
-     # NOTE: matrices returned by h5read have to be transposed to to obtain
+     # NOTE: matrices returned by h5read have to be transposed to obtain
      # required Fortran order!
      data <- data.frame(t(h5read(h5File, data_paths[idx])))
      names <- t(h5read(h5File, name_paths[idx]))
 
@@ -78,7 +78,7 @@ Bug Fixes
 - Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`)
 - Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`)
 - Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`)
-
+- Bug in ``DataFrame.combine_first()`` for integer columns (:issue:`14687`).
 
 - Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`)
 - Bug in ``pd.read_csv()`` in which the ``nrows`` parameter was not being respected for large input when using the C engine for parsing (:issue:`7626`)
@@ -88,4 +88,6 @@ Bug Fixes
 
 - Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
 
+- Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`)
+
 - Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)