pandas-dev
diff --git a/‎asv_bench/benchmarks/io/csv.py
Lines changed: 27 additions & 2 deletions b/‎asv_bench/benchmarks/io/csv.py
Lines changed: 27 additions & 2 deletions
diff --git a/‎ci/azure/windows.yml
Lines changed: 4 additions & 8 deletions b/‎ci/azure/windows.yml
Lines changed: 4 additions & 8 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 4 additions & 0 deletions b/‎ci/code_checks.sh
Lines changed: 4 additions & 0 deletions
diff --git a/‎ci/incremental/setup_conda_environment.cmd
Lines changed: 0 additions & 23 deletions b/‎ci/incremental/setup_conda_environment.cmd
Lines changed: 0 additions & 23 deletions
diff --git a/‎doc/source/whatsnew/v0.25.0.rst
Lines changed: 33 additions & 0 deletions b/‎doc/source/whatsnew/v0.25.0.rst
Lines changed: 33 additions & 0 deletions
diff --git a/‎pandas/_libs/parsers.pyx
Lines changed: 12 additions & 8 deletions b/‎pandas/_libs/parsers.pyx
Lines changed: 12 additions & 8 deletions
diff --git a/‎pandas/compat/__init__.py
Lines changed: 0 additions & 8 deletions b/‎pandas/compat/__init__.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎pandas/core/groupby/generic.py
Lines changed: 6 additions & 11 deletions b/‎pandas/core/groupby/generic.py
Lines changed: 6 additions & 11 deletions
diff --git a/‎pandas/core/groupby/groupby.py
Lines changed: 0 additions & 1 deletion b/‎pandas/core/groupby/groupby.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎pandas/core/reshape/merge.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/reshape/merge.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/tools/datetimes.py
Lines changed: 8 additions & 10 deletions b/‎pandas/core/tools/datetimes.py
Lines changed: 8 additions & 10 deletions
diff --git a/‎pandas/plotting/_converter.py
Lines changed: 1 addition & 2 deletions b/‎pandas/plotting/_converter.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎pandas/plotting/_core.py
Lines changed: 2 additions & 3 deletions b/‎pandas/plotting/_core.py
Lines changed: 2 additions & 3 deletions
diff --git a/‎pandas/plotting/_misc.py
Lines changed: 4 additions & 5 deletions b/‎pandas/plotting/_misc.py
Lines changed: 4 additions & 5 deletions
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pandas.util.testing as tm
-from pandas import DataFrame, Categorical, date_range, read_csv
+from pandas import DataFrame, Categorical, date_range, read_csv, to_datetime
 from pandas.io.parsers import _parser_defaults
 from io import StringIO
 
@@ -302,7 +302,7 @@ def mem_parser_chunks(self):
 
 class ReadCSVParseSpecialDate(StringIORewind):
     params = (['mY', 'mdY', 'hm'],)
-    params_name = ['value']
+    param_names = ['value']
     objects = {
         'mY': '01-2019\n10-2019\n02/2000\n',
         'mdY': '12/02/2010\n',
@@ -319,4 +319,29 @@ def time_read_special_date(self, value):
                  names=['Date'], parse_dates=['Date'])
 
 
+class ParseDateComparison(StringIORewind):
+    params = ([False, True],)
+    param_names = ['cache_dates']
+
+    def setup(self, cache_dates):
+        count_elem = 10000
+        data = '12-02-2010\n' * count_elem
+        self.StringIO_input = StringIO(data)
+
+    def time_read_csv_dayfirst(self, cache_dates):
+        read_csv(self.data(self.StringIO_input), sep=',', header=None,
+                 names=['Date'], parse_dates=['Date'], cache_dates=cache_dates,
+                 dayfirst=True)
+
+    def time_to_datetime_dayfirst(self, cache_dates):
+        df = read_csv(self.data(self.StringIO_input),
+                      dtype={'date': str}, names=['date'])
+        to_datetime(df['date'], cache=cache_dates, dayfirst=True)
+
+    def time_to_datetime_format_DD_MM_YYYY(self, cache_dates):
+        df = read_csv(self.data(self.StringIO_input),
+                      dtype={'date': str}, names=['date'])
+        to_datetime(df['date'], cache=cache_dates, format='%d-%m-%Y')
+
+
 from ..pandas_vb_common import setup  # noqa: F401
@@ -17,14 +17,10 @@ jobs:
         CONDA_PY: "37"
 
   steps:
-    - task: CondaEnvironment@1
-      inputs:
-        updateConda: no
-        packageSpecs: ''
-
-    - script: |
-        ci\\incremental\\setup_conda_environment.cmd
-      displayName: 'Before Install'
+    - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
+      displayName: Add conda to PATH
+    - script: conda env create --file ci\\deps\\azure-windows-$(CONDA_PY).yaml
+      displayName: Create anaconda environment
     - script: |
         call activate pandas-dev
         ci\\incremental\\build.cmd
 
@@ -239,6 +239,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
     pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Doctests datetimes.py' ; echo $MSG
+    pytest -q --doctest-modules pandas/core/tools/datetimes.py
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Doctests top-level reshaping functions' ; echo $MSG
     pytest -q --doctest-modules \
         pandas/core/reshape/concat.py \
 
@@ -154,6 +154,37 @@ Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
 cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
 
 
+``DataFrame`` groupby ffill/bfill no longer return group labels
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The methods ``ffill``, ``bfill``, ``pad`` and ``backfill`` of
+:class:`DataFrameGroupBy <pandas.core.groupby.DataFrameGroupBy>`
+previously included the group labels in the return value, which was
+inconsistent with other groupby transforms. Now only the filled values
+are returned. (:issue:`21521`)
+
+.. ipython:: python
+
+    df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]})
+    df
+
+*Previous Behaviour*:
+
+.. code-block:: python
+
+   In [3]: df.groupby("a").ffill()
+   Out[3]:
+      a  b
+   0  x  1
+   1  y  2
+
+*New Behaviour*:
+
+.. ipython:: python
+
+    df.groupby("a").ffill()
+
+
 .. _whatsnew_0250.api_breaking.deps:
 
 Increased minimum versions for dependencies
@@ -299,6 +330,7 @@ Timezones
 - Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`)
 - Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`)
 - Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`)
+- Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`)
 
 Numeric
 ^^^^^^^
@@ -409,6 +441,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`)
 - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`)
+- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`)
 
 
 Reshaping
 
@@ -41,15 +41,14 @@ import pandas._libs.lib as lib
 from pandas._libs.khash cimport (
     khiter_t,
     kh_str_t, kh_init_str, kh_put_str, kh_exist_str,
-    kh_get_str, kh_destroy_str, kh_resize_str,
+    kh_get_str, kh_destroy_str,
     kh_float64_t, kh_get_float64, kh_destroy_float64,
     kh_put_float64, kh_init_float64, kh_resize_float64,
     kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox,
     kh_destroy_strbox,
     kh_str_starts_t, kh_put_str_starts_item, kh_init_str_starts,
     kh_get_str_starts_item, kh_destroy_str_starts, kh_resize_str_starts)
 
-import pandas.compat as compat
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_integer_dtype, is_float_dtype,
@@ -477,14 +476,19 @@ cdef class TextReader:
 
         self.verbose = verbose
         self.low_memory = low_memory
-        self.parser.double_converter_nogil = xstrtod
-        self.parser.double_converter_withgil = NULL
-        if float_precision == 'high':
-            self.parser.double_converter_nogil = precise_xstrtod
-            self.parser.double_converter_withgil = NULL
-        elif float_precision == 'round_trip':  # avoid gh-15140
+
+        if float_precision == "round_trip":
+            # see gh-15140
+            #
+            # Our current roundtrip implementation requires the GIL.
             self.parser.double_converter_nogil = NULL
             self.parser.double_converter_withgil = round_trip
+        elif float_precision == "high":
+            self.parser.double_converter_withgil = NULL
+            self.parser.double_converter_nogil = precise_xstrtod
+        else:
+            self.parser.double_converter_withgil = NULL
+            self.parser.double_converter_nogil = xstrtod
 
         if isinstance(dtype, dict):
             dtype = {k: pandas_dtype(dtype[k])
 
@@ -4,9 +4,6 @@
 
 Cross-compatible functions for different versions of Python.
 
-Key items to import for compatible code:
-* lists: lrange()
-
 Other items:
 * platform checker
 """
@@ -19,11 +16,6 @@
 PYPY = platform.python_implementation() == 'PyPy'
 
 
-# list-producing versions of the major Python iterating functions
-def lrange(*args, **kwargs):
-    return list(range(*args, **kwargs))
-
-
 # ----------------------------------------------------------------------------
 # functions largely based / taken from the six module
 
 
@@ -916,8 +916,12 @@ def transform(self, func, *args, **kwargs):
             s = klass(res, indexer)
             results.append(s)
 
-        from pandas.core.reshape.concat import concat
-        result = concat(results).sort_index()
+        # check for empty "results" to avoid concat ValueError
+        if results:
+            from pandas.core.reshape.concat import concat
+            result = concat(results).sort_index()
+        else:
+            result = Series()
 
         # we will only try to coerce the result type if
         # we have a numeric dtype, as these are *always* udfs
@@ -1480,15 +1484,6 @@ def _apply_to_column_groupbys(self, func):
              in self._iterate_column_groupbys()),
             keys=self._selected_obj.columns, axis=1)
 
-    def _fill(self, direction, limit=None):
-        """Overridden method to join grouped columns in output"""
-        res = super()._fill(direction, limit=limit)
-        output = OrderedDict(
-            (grp.name, grp.grouper) for grp in self.grouper.groupings)
-
-        from pandas import concat
-        return concat((self._wrap_transformed_output(output), res), axis=1)
-
     def count(self):
         """
         Compute count of group, excluding missing values.
 
@@ -2235,7 +2235,6 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
                                                      limit=limit, freq=freq,
                                                      axis=axis))
         filled = getattr(self, fill_method)(limit=limit)
-        filled = filled.drop(self.grouper.names, axis=1)
         fill_grp = filled.groupby(self.grouper.labels)
         shifted = fill_grp.shift(periods=periods, freq=freq)
         return (filled / shifted) - 1
 
@@ -1674,8 +1674,8 @@ def _right_outer_join(x, y, max_groups):
 def _factorize_keys(lk, rk, sort=True):
     # Some pre-processing for non-ndarray lk / rk
     if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
-        lk = lk._data
-        rk = rk._data
+        lk = getattr(lk, '_values', lk)._data
+        rk = getattr(rk, '_values', rk)._data
 
     elif (is_categorical_dtype(lk) and
             is_categorical_dtype(rk) and
 
@@ -525,8 +525,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
     'ms', 'us', 'ns']) or plurals of the same
 
     >>> df = pd.DataFrame({'year': [2015, 2016],
-                           'month': [2, 3],
-                           'day': [4, 5]})
+    ...                    'month': [2, 3],
+    ...                    'day': [4, 5]})
     >>> pd.to_datetime(df)
     0   2015-02-04
     1   2016-03-05
@@ -548,8 +548,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
     Passing infer_datetime_format=True can often-times speedup a parsing
     if its not an ISO8601 format exactly, but in a regular format.
 
-    >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000']*1000)
-
+    >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
     >>> s.head()
     0    3/11/2000
     1    3/12/2000
@@ -558,10 +557,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
     4    3/12/2000
     dtype: object
 
-    >>> %timeit pd.to_datetime(s,infer_datetime_format=True)
+    >>> %timeit pd.to_datetime(s,infer_datetime_format=True)  # doctest: +SKIP
     100 loops, best of 3: 10.4 ms per loop
 
-    >>> %timeit pd.to_datetime(s,infer_datetime_format=False)
+    >>> %timeit pd.to_datetime(s,infer_datetime_format=False)  # doctest: +SKIP
     1 loop, best of 3: 471 ms per loop
 
     Using a unix epoch time
@@ -577,10 +576,9 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
     Using a non-unix epoch origin
 
     >>> pd.to_datetime([1, 2, 3], unit='D',
-                       origin=pd.Timestamp('1960-01-01'))
-    0    1960-01-02
-    1    1960-01-03
-    2    1960-01-04
+    ...                origin=pd.Timestamp('1960-01-01'))
+    DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], \
+dtype='datetime64[ns]', freq=None)
     """
     if arg is None:
         return None
 
@@ -12,7 +12,6 @@
 from pandas._libs import lib, tslibs
 from pandas._libs.tslibs import resolution
 from pandas._libs.tslibs.frequencies import FreqGroup, get_freq
-from pandas.compat import lrange
 
 from pandas.core.dtypes.common import (
     is_datetime64_ns_dtype, is_float, is_float_dtype, is_integer,
@@ -1029,7 +1028,7 @@ def __call__(self):
             base = self.base
             (d, m) = divmod(vmin, base)
             vmin = (d + 1) * base
-            locs = lrange(vmin, vmax + 1, base)
+            locs = list(range(vmin, vmax + 1, base))
         return locs
 
     def autoscale(self):
 
@@ -8,7 +8,6 @@
 
 from pandas._config import get_option
 
-from pandas.compat import lrange
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, cache_readonly
 
@@ -583,9 +582,9 @@ def _get_xticks(self, convert_period=False):
                 x = self.data.index._mpl_repr()
             else:
                 self._need_to_set_index = True
-                x = lrange(len(index))
+                x = list(range(len(index)))
         else:
-            x = lrange(len(index))
+            x = list(range(len(index)))
 
         return x
 
 
@@ -1,7 +1,6 @@
 # being a bit too dynamic
 import numpy as np
 
-from pandas.compat import lrange
 from pandas.util._decorators import deprecate_kwarg
 
 from pandas.core.dtypes.missing import notna
@@ -81,8 +80,8 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
         rdelta_ext = (rmax_ - rmin_) * range_padding / 2.
         boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext))
 
-    for i, a in zip(lrange(n), df.columns):
-        for j, b in zip(lrange(n), df.columns):
+    for i, a in enumerate(df.columns):
+        for j, b in enumerate(df.columns):
             ax = axes[i, j]
 
             if i == j:
@@ -420,7 +419,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
                           for sampling in samplings])
     if fig is None:
         fig = plt.figure()
-    x = lrange(samples)
+    x = list(range(samples))
     axes = []
     ax1 = fig.add_subplot(2, 3, 1)
     ax1.set_xlabel("Sample")
@@ -532,7 +531,7 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
             raise ValueError('Length of xticks must match number of columns')
         x = xticks
     else:
-        x = lrange(ncols)
+        x = list(range(ncols))
 
     if ax is None:
         ax = plt.gca()