pandas-dev
diff --git a/‎.github/CONTRIBUTING.md
Lines changed: 1 addition & 1 deletion b/‎.github/CONTRIBUTING.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/FUNDING.yml
Lines changed: 1 addition & 0 deletions b/‎.github/FUNDING.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.travis.yml
Lines changed: 9 additions & 14 deletions b/‎.travis.yml
Lines changed: 9 additions & 14 deletions
diff --git a/‎LICENSES/HAVEN_LICENSE
Lines changed: 2 additions & 0 deletions b/‎LICENSES/HAVEN_LICENSE
Lines changed: 2 additions & 0 deletions
diff --git a/‎LICENSES/HAVEN_MIT
Lines changed: 32 additions & 0 deletions b/‎LICENSES/HAVEN_MIT
Lines changed: 32 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/asv.conf.json
Lines changed: 1 addition & 1 deletion b/‎asv_bench/asv.conf.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 2 additions & 0 deletions b/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/groupby.py
Lines changed: 1 addition & 6 deletions b/‎asv_bench/benchmarks/groupby.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎asv_bench/benchmarks/index_object.py
Lines changed: 24 additions & 1 deletion b/‎asv_bench/benchmarks/index_object.py
Lines changed: 24 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/io/csv.py
Lines changed: 56 additions & 2 deletions b/‎asv_bench/benchmarks/io/csv.py
Lines changed: 56 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/io/parsers.py
Lines changed: 38 additions & 0 deletions b/‎asv_bench/benchmarks/io/parsers.py
Lines changed: 38 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/multiindex_object.py
Lines changed: 15 additions & 1 deletion b/‎asv_bench/benchmarks/multiindex_object.py
Lines changed: 15 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/rolling.py
Lines changed: 0 additions & 6 deletions b/‎asv_bench/benchmarks/rolling.py
Lines changed: 0 additions & 6 deletions
@@ -2,7 +2,7 @@
 
 Whether you are a novice or experienced software developer, all contributions and suggestions are welcome!
 
-Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information.
+Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/development/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information.
 
 ## Getting Started
 
 
@@ -0,0 +1 @@
+custom: https://pandas.pydata.org/donate.html
@@ -48,17 +48,10 @@ matrix:
       env:
         - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
 
-    # In allow_failures
-    - dist: trusty
-      env:
-        - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
     allow_failures:
       - dist: trusty
         env:
           - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
-      - dist: trusty
-        env:
-          - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
 
 before_install:
   - echo "before_install"
@@ -86,19 +79,21 @@ install:
   - ci/submit_cython_cache.sh
   - echo "install done"
 
+before_script:
+  # display server (for clipboard functionality) needs to be started here,
+  # does not work if done in install:setup_env.sh (GH-26103)
+  - export DISPLAY=":99.0"
+  - echo "sh -e /etc/init.d/xvfb start"
+  - sh -e /etc/init.d/xvfb start
+  - sleep 3
+
 script:
   - echo "script start"
   - source activate pandas-dev
-  - ci/build_docs.sh
   - ci/run_tests.sh
 
 after_script:
   - echo "after_script start"
   - source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
-  - if [ -e test-data-single.xml ]; then
-        ci/print_skipped.py test-data-single.xml;
-    fi
-  - if [ -e test-data-multiple.xml ]; then
-        ci/print_skipped.py test-data-multiple.xml;
-    fi
+  - ci/print_skipped.py 
   - echo "after_script done"
@@ -0,0 +1,2 @@
+YEAR: 2013-2016
+COPYRIGHT HOLDER: Hadley Wickham; RStudio; and Evan Miller
@@ -0,0 +1,32 @@
+Based on http://opensource.org/licenses/MIT
+
+This is a template. Complete and ship as file LICENSE the following 2
+lines (only)
+
+YEAR:
+COPYRIGHT HOLDER:
+
+and specify as
+
+License: MIT + file LICENSE
+
+Copyright (c) <YEAR>, <COPYRIGHT HOLDER>
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -224,7 +224,7 @@ Most development discussion is taking place on github in this repo. Further, the
 
 All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
 
-A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas-docs.github.io/pandas-docs-travis/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
+A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
 
 If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
 
 
@@ -107,7 +107,7 @@
     // `asv` will cache wheels of the recent builds in each
     // environment, making them faster to install next time.  This is
     // number of builds to keep, per environment.
-    "wheel_cache_size": 8,
+    "build_cache_size": 8,
 
     // The commits after which the regression search in `asv publish`
     // should start looking for regressions. Dictionary whose keys are
 
@@ -96,6 +96,8 @@ def time_dict_rename_both_axes(self):
 
 
 class Iteration:
+    # mem_itertuples_* benchmarks are slow
+    timeout = 120
 
     def setup(self):
         N = 1000
 
@@ -1,12 +1,11 @@
 from functools import partial
 from itertools import product
 from string import ascii_letters
-import warnings
 
 import numpy as np
 
 from pandas import (
-    Categorical, DataFrame, MultiIndex, Series, TimeGrouper, Timestamp,
+    Categorical, DataFrame, MultiIndex, Series, Timestamp,
     date_range, period_range)
 import pandas.util.testing as tm
 
@@ -301,10 +300,6 @@ def setup(self):
     def time_multi_size(self):
         self.df.groupby(['key1', 'key2']).size()
 
-    def time_dt_timegrouper_size(self):
-        with warnings.catch_warnings(record=True):
-            self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
-
     def time_category_size(self):
         self.draws.groupby(self.cats).size()
 
 
@@ -52,7 +52,6 @@ def time_is_dates_only(self):
 
 class Ops:
 
-    sample_time = 0.2
     params = ['float', 'int']
     param_names = ['dtype']
 
@@ -95,6 +94,12 @@ def time_min(self):
     def time_min_trivial(self):
         self.idx_inc.min()
 
+    def time_get_loc_inc(self):
+        self.idx_inc.get_loc(900000)
+
+    def time_get_loc_dec(self):
+        self.idx_dec.get_loc(100000)
+
 
 class IndexAppend:
 
@@ -191,8 +196,26 @@ def setup(self, N):
         self.intv = IntervalIndex.from_arrays(left, right)
         self.intv._engine
 
+        self.intv2 = IntervalIndex.from_arrays(left + 1, right + 1)
+        self.intv2._engine
+
+        self.left = IntervalIndex.from_breaks(np.arange(N))
+        self.right = IntervalIndex.from_breaks(np.arange(N - 3, 2 * N - 3))
+
     def time_monotonic_inc(self, N):
         self.intv.is_monotonic_increasing
 
+    def time_is_unique(self, N):
+        self.intv.is_unique
+
+    def time_intersection(self, N):
+        self.left.intersection(self.right)
+
+    def time_intersection_one_duplicate(self, N):
+        self.intv.intersection(self.right)
+
+    def time_intersection_both_duplicate(self, N):
+        self.intv.intersection(self.intv2)
+
 
 from .pandas_vb_common import setup  # noqa: F401
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pandas.util.testing as tm
-from pandas import DataFrame, Categorical, date_range, read_csv
+from pandas import DataFrame, Categorical, date_range, read_csv, to_datetime
 from pandas.io.parsers import _parser_defaults
 from io import StringIO
 
@@ -96,6 +96,35 @@ def time_read_csv(self, infer_datetime_format, format):
                  infer_datetime_format=infer_datetime_format)
 
 
+class ReadCSVConcatDatetime(StringIORewind):
+
+    iso8601 = '%Y-%m-%d %H:%M:%S'
+
+    def setup(self):
+        rng = date_range('1/1/2000', periods=50000, freq='S')
+        self.StringIO_input = StringIO('\n'.join(
+                                       rng.strftime(self.iso8601).tolist()))
+
+    def time_read_csv(self):
+        read_csv(self.data(self.StringIO_input),
+                 header=None, names=['foo'], parse_dates=['foo'],
+                 infer_datetime_format=False)
+
+
+class ReadCSVConcatDatetimeBadDateValue(StringIORewind):
+
+    params = (['nan', '0', ''],)
+    param_names = ['bad_date_value']
+
+    def setup(self, bad_date_value):
+        self.StringIO_input = StringIO(('%s,\n' % bad_date_value) * 50000)
+
+    def time_read_csv(self, bad_date_value):
+        read_csv(self.data(self.StringIO_input),
+                 header=None, names=['foo', 'bar'], parse_dates=['foo'],
+                 infer_datetime_format=False)
+
+
 class ReadCSVSkipRows(BaseIO):
 
     fname = '__test__.csv'
@@ -273,7 +302,7 @@ def mem_parser_chunks(self):
 
 class ReadCSVParseSpecialDate(StringIORewind):
     params = (['mY', 'mdY', 'hm'],)
-    params_name = ['value']
+    param_names = ['value']
     objects = {
         'mY': '01-2019\n10-2019\n02/2000\n',
         'mdY': '12/02/2010\n',
@@ -290,4 +319,29 @@ def time_read_special_date(self, value):
                  names=['Date'], parse_dates=['Date'])
 
 
+class ParseDateComparison(StringIORewind):
+    params = ([False, True],)
+    param_names = ['cache_dates']
+
+    def setup(self, cache_dates):
+        count_elem = 10000
+        data = '12-02-2010\n' * count_elem
+        self.StringIO_input = StringIO(data)
+
+    def time_read_csv_dayfirst(self, cache_dates):
+        read_csv(self.data(self.StringIO_input), sep=',', header=None,
+                 names=['Date'], parse_dates=['Date'], cache_dates=cache_dates,
+                 dayfirst=True)
+
+    def time_to_datetime_dayfirst(self, cache_dates):
+        df = read_csv(self.data(self.StringIO_input),
+                      dtype={'date': str}, names=['date'])
+        to_datetime(df['date'], cache=cache_dates, dayfirst=True)
+
+    def time_to_datetime_format_DD_MM_YYYY(self, cache_dates):
+        df = read_csv(self.data(self.StringIO_input),
+                      dtype={'date': str}, names=['date'])
+        to_datetime(df['date'], cache=cache_dates, format='%d-%m-%Y')
+
+
 from ..pandas_vb_common import setup  # noqa: F401
@@ -0,0 +1,38 @@
+import numpy as np
+
+try:
+    from pandas._libs.tslibs.parsing import (
+        _concat_date_cols, _does_string_look_like_datetime)
+except ImportError:
+    # Avoid whole benchmark suite import failure on asv (currently 0.4)
+    pass
+
+
+class DoesStringLookLikeDatetime(object):
+
+    params = (['2Q2005', '0.0', '10000'],)
+    param_names = ['value']
+
+    def setup(self, value):
+        self.objects = [value] * 1000000
+
+    def time_check_datetimes(self, value):
+        for obj in self.objects:
+            _does_string_look_like_datetime(obj)
+
+
+class ConcatDateCols(object):
+
+    params = ([1234567890, 'AAAA'], [1, 2])
+    param_names = ['value', 'dim']
+
+    def setup(self, value, dim):
+        count_elem = 10000
+        if dim == 1:
+            self.object = (np.array([value] * count_elem),)
+        if dim == 2:
+            self.object = (np.array([value] * count_elem),
+                           np.array([value] * count_elem))
+
+    def time_check_concat(self, value, dim):
+        _concat_date_cols(self.object)
@@ -2,7 +2,7 @@
 
 import numpy as np
 import pandas.util.testing as tm
-from pandas import date_range, MultiIndex
+from pandas import date_range, MultiIndex, DataFrame
 
 
 class GetLoc:
@@ -126,4 +126,18 @@ def time_datetime_level_values_sliced(self, mi):
         mi[:10].values
 
 
+class CategoricalLevel:
+
+    def setup(self):
+
+        self.df = DataFrame({
+            'a': np.arange(1_000_000, dtype=np.int32),
+            'b': np.arange(1_000_000, dtype=np.int64),
+            'c': np.arange(1_000_000, dtype=float),
+        }).astype({'a': 'category', 'b': 'category'})
+
+    def time_categorical_level(self):
+        self.df.set_index(['a', 'b'])
+
+
 from .pandas_vb_common import setup  # noqa: F401
@@ -4,7 +4,6 @@
 
 class Methods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],
@@ -23,7 +22,6 @@ def time_rolling(self, constructor, window, dtype, method):
 
 class ExpandingMethods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               ['int', 'float'],
               ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
@@ -41,7 +39,6 @@ def time_expanding(self, constructor, dtype, method):
 
 class EWMMethods:
 
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],
@@ -58,7 +55,6 @@ def time_ewm(self, constructor, window, dtype, method):
 
 
 class VariableWindowMethods(Methods):
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               ['50s', '1h', '1d'],
               ['int', 'float'],
@@ -75,7 +71,6 @@ def setup(self, constructor, window, dtype, method):
 
 class Pairwise:
 
-    sample_time = 0.2
     params = ([10, 1000, None],
               ['corr', 'cov'],
               [True, False])
@@ -95,7 +90,6 @@ def time_pairwise(self, window, method, pairwise):
 
 
 class Quantile:
-    sample_time = 0.2
     params = (['DataFrame', 'Series'],
               [10, 1000],
               ['int', 'float'],
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+custom: https://pandas.pydata.org/donate.html`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+YEAR: 2013-2016`
	`2`	`+COPYRIGHT HOLDER: Hadley Wickham; RStudio; and Evan Miller`