pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 30 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 30 additions & 0 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 0 additions & 23 deletions b/‎ci/code_checks.sh
Lines changed: 0 additions & 23 deletions
diff --git a/‎doc/source/user_guide/indexing.rst
Lines changed: 11 additions & 0 deletions b/‎doc/source/user_guide/indexing.rst
Lines changed: 11 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.1.5.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v1.1.5.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.2.0.rst
Lines changed: 7 additions & 2 deletions b/‎doc/source/whatsnew/v1.2.0.rst
Lines changed: 7 additions & 2 deletions
diff --git a/‎pandas/_libs/join.pyx
Lines changed: 1 addition & 1 deletion b/‎pandas/_libs/join.pyx
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/parsers.pyx
Lines changed: 14 additions & 108 deletions b/‎pandas/_libs/parsers.pyx
Lines changed: 14 additions & 108 deletions
@@ -119,6 +119,36 @@ repos:
         entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
         types: [python]
         exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: FrameOrSeriesUnion
+        name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
+        entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
+        language: pygrep
+        types: [python]
+        exclude: ^pandas/_typing\.py$
+    -   id: type-not-class
+        name: Check for use of foo.__class__ instead of type(foo)
+        entry: \.__class__
+        language: pygrep
+        files: \.(py|pyx)$
+    -   id: unwanted-typing
+        name: Check for use of comment-based annotation syntax and missing error codes
+        entry: |
+            (?x)
+            \#\ type:\ (?!ignore)|
+            \#\ type:\s?ignore(?!\[)
+        language: pygrep
+        types: [python]
+    -   id: no-os-remove
+        name: Check code for instances of os.remove
+        entry: os\.remove
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
+        exclude: |
+            (?x)^
+            pandas/tests/io/excel/test_writers\.py|
+            pandas/tests/io/pytables/common\.py|
+            pandas/tests/io/pytables/test_store\.py$
 -   repo: https://github.com/asottile/yesqa
     rev: v1.2.2
     hooks:
 
@@ -122,29 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # -------------------------------------------------------------------------
-    # Type annotations
-
-    MSG='Check for use of comment-based annotation syntax' ; echo $MSG
-    invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for missing error codes with # type: ignore' ; echo $MSG
-    invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias' ; echo $MSG
-    invgrep -R --include="*.py" --exclude=_typing.py -E 'Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # -------------------------------------------------------------------------
-    MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
-    invgrep -R --include=*.{py,pyx} '\.__class__' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check code for instances of os.remove' ; echo $MSG
-    invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
     for class in "Series" "DataFrame" "Index" "MultiIndex" "Timestamp" "Timedelta" "TimedeltaIndex" "DatetimeIndex" "Categorical"; do
         check_namespace ${class}
 
@@ -422,6 +422,17 @@ above example, ``s.loc[1:6]`` would raise ``KeyError``.
 For the rationale behind this behavior, see
 :ref:`Endpoints are inclusive <advanced.endpoints_are_inclusive>`.
 
+.. ipython:: python
+
+   s = pd.Series(list('abcdef'), index=[0, 3, 2, 5, 4, 2])
+   s.loc[3:5]
+
+Also, if the index has duplicate labels *and* either the start or the stop label is dupulicated,
+an error will be raised. For instance, in the above example, ``s.loc[2:5]`` would raise a ``KeyError``.
+
+For more information about duplicate labels, see
+:ref:`Duplicate Labels <duplicates>`.
+
 .. _indexing.integer:
 
 Selection by position
 
@@ -23,7 +23,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
--
+- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -231,6 +231,7 @@ Other enhancements
 - :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`)
 - :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`)
 - :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`)
+- :func:`read_csv` supports memory-mapping for compressed files (:issue:`37621`)
 
 .. _whatsnew_120.api_breaking.python:
 
@@ -400,11 +401,13 @@ Datetimelike
 - Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`)
 - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`)
 - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`)
+- Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`)
 
 Timedelta
 ^^^^^^^^^
 - Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`)
 - Bug in parsing of ISO 8601 durations in :class:`Timedelta`, :meth:`pd.to_datetime` (:issue:`37159`, fixes :issue:`29773` and :issue:`36204`)
+- Bug in :func:`to_timedelta` with a read-only array incorrectly raising (:issue:`34857`)
 
 Timezones
 ^^^^^^^^^
@@ -434,7 +437,7 @@ Numeric
 Conversion
 ^^^^^^^^^^
 
--
+- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`)
 -
 
 Strings
@@ -499,6 +502,7 @@ I/O
 - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`)
 - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
 - Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
+- :func:`read_csv` was closing user-provided binary file handles when ``engine="c"`` and an ``encoding`` was requested (:issue:`36980`)
 - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`)
 
 Plotting
@@ -545,6 +549,7 @@ Reshaping
 - Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`)
 - Bug in :meth:`DataFrame.combine_first()` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`)
 - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`)
+- Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`)
 
 Sparse
 ^^^^^^
@@ -567,7 +572,7 @@ Other
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
 - Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`)
-- Fixed metadata propagation in the :class:`Series.dt` and :class:`Series.str` accessors and :class:`DataFrame.duplicated` and :class:`DataFrame.stack` and :class:`DataFrame.unstack` and :class:`DataFrame.pivot` methods (:issue:`28283`)
+- Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`) (:issue:`37381`)
 - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)
 - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`)
 - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`).
 
@@ -268,7 +268,7 @@ ctypedef fused join_t:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def left_join_indexer_unique(join_t[:] left, join_t[:] right):
+def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[int64_t] indexer
 
@@ -1,23 +1,18 @@
 # Copyright (c) 2012, Lambda Foundry, Inc.
 # See LICENSE for the license
-import bz2
 from csv import QUOTE_MINIMAL, QUOTE_NONE, QUOTE_NONNUMERIC
 from errno import ENOENT
-import gzip
-import io
-import os
 import sys
 import time
 import warnings
-import zipfile
 
 from libc.stdlib cimport free
 from libc.string cimport strcasecmp, strlen, strncpy
 
 import cython
 from cython import Py_ssize_t
 
-from cpython.bytes cimport PyBytes_AsString, PyBytes_FromString
+from cpython.bytes cimport PyBytes_AsString
 from cpython.exc cimport PyErr_Fetch, PyErr_Occurred
 from cpython.object cimport PyObject
 from cpython.ref cimport Py_XDECREF
@@ -67,7 +62,6 @@ from pandas._libs.khash cimport (
     khiter_t,
 )
 
-from pandas.compat import get_lzma_file, import_lzma
 from pandas.errors import DtypeWarning, EmptyDataError, ParserError, ParserWarning
 
 from pandas.core.dtypes.common import (
@@ -82,11 +76,10 @@ from pandas.core.dtypes.common import (
 )
 from pandas.core.dtypes.concat import union_categoricals
 
-lzma = import_lzma()
-
 cdef:
     float64_t INF = <float64_t>np.inf
     float64_t NEGINF = -INF
+    int64_t DEFAULT_CHUNKSIZE = 256 * 1024
 
 
 cdef extern from "headers/portable.h":
@@ -275,14 +268,15 @@ cdef extern from "parser/io.h":
                           size_t *bytes_read, int *status)
 
 
-DEFAULT_CHUNKSIZE = 256 * 1024
-
-
 cdef class TextReader:
     """
 
     # source: StringIO or file object
 
+    ..versionchange:: 1.2.0
+        removed 'compression', 'memory_map', and 'encoding' argument.
+        These arguments are outsourced to CParserWrapper.
+        'source' has to be a file handle.
     """
 
     cdef:
@@ -299,16 +293,14 @@ cdef class TextReader:
 
     cdef public:
         int64_t leading_cols, table_width, skipfooter, buffer_lines
-        bint allow_leading_cols, mangle_dupe_cols, memory_map, low_memory
+        bint allow_leading_cols, mangle_dupe_cols, low_memory
         bint delim_whitespace
         object delimiter, converters
         object na_values
         object header, orig_header, names, header_start, header_end
         object index_col
         object skiprows
         object dtype
-        object encoding
-        object compression
         object usecols
         list dtype_cast_order
         set unnamed_cols
@@ -321,18 +313,15 @@ cdef class TextReader:
                   header_end=0,
                   index_col=None,
                   names=None,
-                  bint memory_map=False,
                   tokenize_chunksize=DEFAULT_CHUNKSIZE,
                   bint delim_whitespace=False,
-                  compression=None,
                   converters=None,
                   bint skipinitialspace=False,
                   escapechar=None,
                   bint doublequote=True,
                   quotechar=b'"',
                   quoting=0,
                   lineterminator=None,
-                  encoding=None,
                   comment=None,
                   decimal=b'.',
                   thousands=None,
@@ -356,15 +345,7 @@ cdef class TextReader:
                   bint skip_blank_lines=True):
 
         # set encoding for native Python and C library
-        if encoding is not None:
-            if not isinstance(encoding, bytes):
-                encoding = encoding.encode('utf-8')
-            encoding = encoding.lower()
-            self.c_encoding = <char*>encoding
-        else:
-            self.c_encoding = NULL
-
-        self.encoding = encoding
+        self.c_encoding = NULL
 
         self.parser = parser_new()
         self.parser.chunksize = tokenize_chunksize
@@ -374,9 +355,6 @@ cdef class TextReader:
         # For timekeeping
         self.clocks = []
 
-        self.compression = compression
-        self.memory_map = memory_map
-
         self.parser.usecols = (usecols is not None)
 
         self._setup_parser_source(source)
@@ -562,11 +540,6 @@ cdef class TextReader:
         parser_del(self.parser)
 
     def close(self):
-        # we need to properly close an open derived
-        # filehandle here, e.g. and UTFRecoder
-        if self.handle is not None:
-            self.handle.close()
-
         # also preemptively free all allocated memory
         parser_free(self.parser)
         if self.true_set:
@@ -614,82 +587,15 @@ cdef class TextReader:
         cdef:
             void *ptr
 
-        self.parser.cb_io = NULL
-        self.parser.cb_cleanup = NULL
-
-        if self.compression:
-            if self.compression == 'gzip':
-                if isinstance(source, str):
-                    source = gzip.GzipFile(source, 'rb')
-                else:
-                    source = gzip.GzipFile(fileobj=source)
-            elif self.compression == 'bz2':
-                source = bz2.BZ2File(source, 'rb')
-            elif self.compression == 'zip':
-                zip_file = zipfile.ZipFile(source)
-                zip_names = zip_file.namelist()
-
-                if len(zip_names) == 1:
-                    file_name = zip_names.pop()
-                    source = zip_file.open(file_name)
-
-                elif len(zip_names) == 0:
-                    raise ValueError(f'Zero files found in compressed '
-                                     f'zip file {source}')
-                else:
-                    raise ValueError(f'Multiple files found in compressed '
-                                     f'zip file {zip_names}')
-            elif self.compression == 'xz':
-                if isinstance(source, str):
-                    source = get_lzma_file(lzma)(source, 'rb')
-                else:
-                    source = get_lzma_file(lzma)(filename=source)
-            else:
-                raise ValueError(f'Unrecognized compression type: '
-                                 f'{self.compression}')
-
-            if (self.encoding and hasattr(source, "read") and
-                    not hasattr(source, "encoding")):
-                source = io.TextIOWrapper(
-                    source, self.encoding.decode('utf-8'), newline='')
-
-                self.encoding = b'utf-8'
-                self.c_encoding = <char*>self.encoding
-
-            self.handle = source
-
-        if isinstance(source, str):
-            encoding = sys.getfilesystemencoding() or "utf-8"
-            usource = source
-            source = source.encode(encoding)
-
-            if self.memory_map:
-                ptr = new_mmap(source)
-                if ptr == NULL:
-                    # fall back
-                    ptr = new_file_source(source, self.parser.chunksize)
-                    self.parser.cb_io = &buffer_file_bytes
-                    self.parser.cb_cleanup = &del_file_source
-                else:
-                    self.parser.cb_io = &buffer_mmap_bytes
-                    self.parser.cb_cleanup = &del_mmap
-            else:
-                ptr = new_file_source(source, self.parser.chunksize)
-                self.parser.cb_io = &buffer_file_bytes
-                self.parser.cb_cleanup = &del_file_source
-            self.parser.source = ptr
-
-        elif hasattr(source, 'read'):
-            # e.g., StringIO
-
-            ptr = new_rd_source(source)
-            self.parser.source = ptr
-            self.parser.cb_io = &buffer_rd_bytes
-            self.parser.cb_cleanup = &del_rd_source
-        else:
+        if not hasattr(source, "read"):
             raise IOError(f'Expected file path name or file-like object, '
                           f'got {type(source)} type')
 
+        ptr = new_rd_source(source)
+        self.parser.source = ptr
+        self.parser.cb_io = &buffer_rd_bytes
+        self.parser.cb_cleanup = &del_rd_source
+
     cdef _get_header(self):
         # header is now a list of lists, so field_count should use header[0]
Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ Fixed regressions`
`23`	`23`
`24`	`24`	`Bug fixes`
`25`	`25`	`~~~~~~~~~`
`26`		`--`
	`26`	+- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
`27`	`27`	`-`
`28`	`28`
`29`	`29`	`.. ---------------------------------------------------------------------------`