From ce0efe86c5a3c261b91ad36a4b550816d7393994 Mon Sep 17 00:00:00 2001
From: patrick <61934744+phofl@users.noreply.github.com>
Date: Sun, 29 Nov 2020 16:59:10 +0100
Subject: [PATCH 01/15] BUG: Series.at returning Series with one element
 instead of scalar (#38101)

---
 doc/source/whatsnew/v1.2.0.rst       |  1 +
 pandas/core/indexes/multi.py         |  4 ++
 pandas/tests/indexing/test_scalar.py | 70 +++++++++++++++-------------
 3 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 6aff4f4bd41e2..f53cde7fac068 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -631,6 +631,7 @@ Indexing
 - Bug in :meth:`MultiIndex.drop` does not raise if labels are partially found (:issue:`37820`)
 - Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`)
 - Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`)
+- Bug in :meth:`Series.at` returning :class:`Series` with one element instead of scalar when index is a :class:`MultiIndex` with one level (:issue:`38053`)
 - Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`)
 - Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__`  raising ``KeyError`` when columns were :class:`MultiIndex` with only one level (:issue:`29749`)
 - Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` raising blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 9b4b459d9a122..dacd802b21e63 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2530,6 +2530,10 @@ def _get_values_for_loc(self, series: "Series", loc, key):
         if is_scalar(loc):
             return new_values
 
+        if len(new_values) == 1 and not self.nlevels > 1:
+            # If more than one level left, we can not return a scalar
+            return new_values[0]
+
         new_index = self[loc]
         new_index = maybe_droplevels(new_index, key)
         new_ser = series._constructor(new_values, index=new_index, name=series.name)
diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py
index dd01f4e6a4f49..ce48fd1e5c905 100644
--- a/pandas/tests/indexing/test_scalar.py
+++ b/pandas/tests/indexing/test_scalar.py
@@ -268,35 +268,41 @@ def test_at_with_tuple_index_set():
     assert series.at[1, 2] == 3
 
 
-def test_multiindex_at_get():
-    # GH 26989
-    # DataFrame.at and DataFrame.loc getter works with MultiIndex
-    df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
-    assert df.index.nlevels == 2
-    assert df.at[(1, 3), "a"] == 1
-    assert df.loc[(1, 3), "a"] == 1
-
-    # Series.at and Series.loc getter works with MultiIndex
-    series = df["a"]
-    assert series.index.nlevels == 2
-    assert series.at[1, 3] == 1
-    assert series.loc[1, 3] == 1
-
-
-def test_multiindex_at_set():
-    # GH 26989
-    # DataFrame.at and DataFrame.loc setter works with MultiIndex
-    df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
-    assert df.index.nlevels == 2
-    df.at[(1, 3), "a"] = 3
-    assert df.at[(1, 3), "a"] == 3
-    df.loc[(1, 3), "a"] = 4
-    assert df.loc[(1, 3), "a"] == 4
-
-    # Series.at and Series.loc setter works with MultiIndex
-    series = df["a"]
-    assert series.index.nlevels == 2
-    series.at[1, 3] = 5
-    assert series.at[1, 3] == 5
-    series.loc[1, 3] = 6
-    assert series.loc[1, 3] == 6
+class TestMultiIndexScalar:
+    def test_multiindex_at_get(self):
+        # GH 26989
+        # DataFrame.at and DataFrame.loc getter works with MultiIndex
+        df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
+        assert df.index.nlevels == 2
+        assert df.at[(1, 3), "a"] == 1
+        assert df.loc[(1, 3), "a"] == 1
+
+        # Series.at and Series.loc getter works with MultiIndex
+        series = df["a"]
+        assert series.index.nlevels == 2
+        assert series.at[1, 3] == 1
+        assert series.loc[1, 3] == 1
+
+    def test_multiindex_at_set(self):
+        # GH 26989
+        # DataFrame.at and DataFrame.loc setter works with MultiIndex
+        df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
+        assert df.index.nlevels == 2
+        df.at[(1, 3), "a"] = 3
+        assert df.at[(1, 3), "a"] == 3
+        df.loc[(1, 3), "a"] = 4
+        assert df.loc[(1, 3), "a"] == 4
+
+        # Series.at and Series.loc setter works with MultiIndex
+        series = df["a"]
+        assert series.index.nlevels == 2
+        series.at[1, 3] = 5
+        assert series.at[1, 3] == 5
+        series.loc[1, 3] = 6
+        assert series.loc[1, 3] == 6
+
+    def test_multiindex_at_get_one_level(self):
+        # GH#38053
+        s2 = Series((0, 1), index=[[False, True]])
+        result = s2.at[False]
+        assert result == 0

From 22007d3fd294033132be4fffb0fbdd10c8dd46de Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 29 Nov 2020 07:59:56 -0800
Subject: [PATCH 02/15] API: CategoricalIndex.append fallback to concat_compat
 (#38098)

---
 doc/source/whatsnew/v1.2.0.rst                |  2 ++
 pandas/core/indexes/base.py                   |  6 ----
 pandas/core/indexes/category.py               | 20 +++++++-----
 pandas/core/indexes/multi.py                  |  4 ---
 pandas/core/reshape/pivot.py                  | 31 +++++--------------
 .../indexes/categorical/test_category.py      |  8 ++---
 pandas/tests/indexing/test_categorical.py     |  9 ++++--
 .../tests/reshape/concat/test_categorical.py  | 14 ++++++---
 8 files changed, 41 insertions(+), 53 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index f53cde7fac068..501e2878ab135 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -454,6 +454,7 @@ Other API changes
 - Passing an invalid ``fill_value`` to :meth:`Series.shift` with a ``CategoricalDtype`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
 - Passing an invalid value to :meth:`IntervalIndex.insert` or :meth:`CategoricalIndex.insert` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
 - Attempting to reindex a Series with a :class:`CategoricalIndex` with an invalid ``fill_value`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
+- :meth:`CategoricalIndex.append` with an index that contains non-category values will now cast instead of raising ``TypeError`` (:issue:`38098`)
 
 .. ---------------------------------------------------------------------------
 
@@ -635,6 +636,7 @@ Indexing
 - Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`)
 - Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__`  raising ``KeyError`` when columns were :class:`MultiIndex` with only one level (:issue:`29749`)
 - Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` raising blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`)
+- Bug in setting a new label on a :class:`DataFrame` or :class:`Series` with a :class:`CategoricalIndex` incorrectly raising ``TypeError`` when the new label is not among the index's categories (:issue:`38098`)
 
 Missing
 ^^^^^^^
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c49f3f9457161..c86652acbcd0f 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4180,12 +4180,6 @@ def _coerce_scalar_to_index(self, item):
 
         return Index([item], dtype=dtype, **self._get_attributes_dict())
 
-    def _to_safe_for_reshape(self):
-        """
-        Convert to object if we are a categorical.
-        """
-        return self
-
     def _validate_fill_value(self, value):
         """
         Check if the value can be inserted into our array, and convert
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 7956b3a623333..abf70fd150345 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -399,10 +399,6 @@ def unique(self, level=None):
         #  of result, not self.
         return type(self)._simple_new(result, name=self.name)
 
-    def _to_safe_for_reshape(self):
-        """ convert to object if we are a categorical """
-        return self.astype("object")
-
     def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
         """
         Create index with target's values (move/add/delete values as necessary)
@@ -637,11 +633,19 @@ def map(self, mapper):
         mapped = self._values.map(mapper)
         return Index(mapped, name=self.name)
 
-    def _concat(self, to_concat: List["Index"], name: Label) -> "CategoricalIndex":
+    def _concat(self, to_concat: List["Index"], name: Label) -> Index:
         # if calling index is category, don't check dtype of others
-        codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
-        cat = self._data._from_backing_data(codes)
-        return type(self)._simple_new(cat, name=name)
+        try:
+            codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
+        except TypeError:
+            # not all to_concat elements are among our categories (or NA)
+            from pandas.core.dtypes.concat import concat_compat
+
+            res = concat_compat(to_concat)
+            return Index(res, name=name)
+        else:
+            cat = self._data._from_backing_data(codes)
+            return type(self)._simple_new(cat, name=name)
 
     def _delegate_method(self, name: str, *args, **kwargs):
         """ method delegation to the ._values """
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index dacd802b21e63..46846209f315b 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1684,10 +1684,6 @@ def unique(self, level=None):
             level = self._get_level_number(level)
             return self._get_level_values(level=level, unique=True)
 
-    def _to_safe_for_reshape(self):
-        """ convert to object if we are a categorical """
-        return self.set_levels([i._to_safe_for_reshape() for i in self.levels])
-
     def to_frame(self, index=True, name=None):
         """
         Create a DataFrame with the levels of the MultiIndex as columns.
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 22887cede51ed..40496a5b8671b 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -268,19 +268,13 @@ def _add_margins(
     margin_dummy = DataFrame(row_margin, columns=[key]).T
 
     row_names = result.index.names
-    try:
-        # check the result column and leave floats
-        for dtype in set(result.dtypes):
-            cols = result.select_dtypes([dtype]).columns
-            margin_dummy[cols] = margin_dummy[cols].apply(
-                maybe_downcast_to_dtype, args=(dtype,)
-            )
-        result = result.append(margin_dummy)
-    except TypeError:
-
-        # we cannot reshape, so coerce the axis
-        result.index = result.index._to_safe_for_reshape()
-        result = result.append(margin_dummy)
+    # check the result column and leave floats
+    for dtype in set(result.dtypes):
+        cols = result.select_dtypes([dtype]).columns
+        margin_dummy[cols] = margin_dummy[cols].apply(
+            maybe_downcast_to_dtype, args=(dtype,)
+        )
+    result = result.append(margin_dummy)
     result.index.names = row_names
 
     return result
@@ -328,16 +322,7 @@ def _all_key(key):
 
                 # we are going to mutate this, so need to copy!
                 piece = piece.copy()
-                try:
-                    piece[all_key] = margin[key]
-                except ValueError:
-                    # we cannot reshape, so coerce the axis
-                    piece.set_axis(
-                        piece._get_axis(cat_axis)._to_safe_for_reshape(),
-                        axis=cat_axis,
-                        inplace=True,
-                    )
-                    piece[all_key] = margin[key]
+                piece[all_key] = margin[key]
 
                 table_pieces.append(piece)
                 margin_keys.append(all_key)
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
index 2e03c00638a5c..3bab57e1d265e 100644
--- a/pandas/tests/indexes/categorical/test_category.py
+++ b/pandas/tests/indexes/categorical/test_category.py
@@ -57,10 +57,10 @@ def test_append(self):
         expected = CategoricalIndex(list("aabbcaca"), categories=categories)
         tm.assert_index_equal(result, expected, exact=True)
 
-        # invalid objects
-        msg = "cannot append a non-category item to a CategoricalIndex"
-        with pytest.raises(TypeError, match=msg):
-            ci.append(Index(["a", "d"]))
+        # invalid objects -> cast to object via concat_compat
+        result = ci.append(Index(["a", "d"]))
+        expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
+        tm.assert_index_equal(result, expected, exact=True)
 
         # GH14298 - if base object is not categorical -> coerce to object
         result = Index(["c", "a"]).append(ci)
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index 6fff706e27cd2..1b9b6452b2e33 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -57,9 +57,12 @@ def test_loc_scalar(self):
         with pytest.raises(KeyError, match=r"^'d'$"):
             df.loc["d"]
 
-        msg = "cannot append a non-category item to a CategoricalIndex"
-        with pytest.raises(TypeError, match=msg):
-            df.loc["d"] = 10
+        df2 = df.copy()
+        expected = df2.copy()
+        expected.index = expected.index.astype(object)
+        expected.loc["d"] = 10
+        df2.loc["d"] = 10
+        tm.assert_frame_equal(df2, expected)
 
         msg = "'fill_value=d' is not present in this Categorical's categories"
         with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py
index 388575c5a3b86..6dae28003d3b6 100644
--- a/pandas/tests/reshape/concat/test_categorical.py
+++ b/pandas/tests/reshape/concat/test_categorical.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pytest
 
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
@@ -137,13 +136,18 @@ def test_categorical_index_preserver(self):
         ).set_index("B")
         tm.assert_frame_equal(result, expected)
 
-        # wrong categories
+        # wrong categories -> uses concat_compat, which casts to object
         df3 = DataFrame(
             {"A": a, "B": Categorical(b, categories=list("abe"))}
         ).set_index("B")
-        msg = "categories must match existing categories when appending"
-        with pytest.raises(TypeError, match=msg):
-            pd.concat([df2, df3])
+        result = pd.concat([df2, df3])
+        expected = pd.concat(
+            [
+                df2.set_axis(df2.index.astype(object), 0),
+                df3.set_axis(df3.index.astype(object), 0),
+            ]
+        )
+        tm.assert_frame_equal(result, expected)
 
     def test_concat_categorical_tz(self):
         # GH-23816

From 8eca4b76bd3748c201a680a7b66f7dcd84b49315 Mon Sep 17 00:00:00 2001
From: mlondschien <61679398+mlondschien@users.noreply.github.com>
Date: Sun, 29 Nov 2020 17:06:28 +0100
Subject: [PATCH 03/15] BUG: fix astype conversion string -> float (#37974)

---
 doc/source/whatsnew/v1.2.0.rst             |  1 +
 pandas/conftest.py                         | 15 ++++++++++++++-
 pandas/core/arrays/string_.py              | 16 +++++++++++++++-
 pandas/tests/arrays/string_/test_string.py |  9 +++++++++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 501e2878ab135..7c07601352bce 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -589,6 +589,7 @@ Conversion
 ^^^^^^^^^^
 
 - Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`)
+- Bug in :meth:`Series.astype` conversion from ``string`` to ``float`` raised in presence of ``pd.NA`` values (:issue:`37626`)
 -
 
 Strings
diff --git a/pandas/conftest.py b/pandas/conftest.py
index a0ec6f96042fc..3d9d2ba04f31b 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -288,7 +288,6 @@ def unique_nulls_fixture(request):
 # Generate cartesian product of unique_nulls_fixture:
 unique_nulls_fixture2 = unique_nulls_fixture
 
-
 # ----------------------------------------------------------------
 # Classes
 # ----------------------------------------------------------------
@@ -1091,6 +1090,20 @@ def float_ea_dtype(request):
     return request.param
 
 
+@pytest.fixture(params=tm.FLOAT_DTYPES + tm.FLOAT_EA_DTYPES)
+def any_float_allowed_nullable_dtype(request):
+    """
+    Parameterized fixture for float dtypes.
+
+    * float
+    * 'float32'
+    * 'float64'
+    * 'Float32'
+    * 'Float64'
+    """
+    return request.param
+
+
 @pytest.fixture(params=tm.COMPLEX_DTYPES)
 def complex_dtype(request):
     """
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index e75305e55348c..cc2013deb5252 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -18,7 +18,8 @@
 
 from pandas.core import ops
 from pandas.core.array_algos import masked_reductions
-from pandas.core.arrays import IntegerArray, PandasArray
+from pandas.core.arrays import FloatingArray, IntegerArray, PandasArray
+from pandas.core.arrays.floating import FloatingDtype
 from pandas.core.arrays.integer import _IntegerDtype
 from pandas.core.construction import extract_array
 from pandas.core.indexers import check_array_indexer
@@ -294,6 +295,19 @@ def astype(self, dtype, copy=True):
             arr[mask] = 0
             values = arr.astype(dtype.numpy_dtype)
             return IntegerArray(values, mask, copy=False)
+        elif isinstance(dtype, FloatingDtype):
+            arr = self.copy()
+            mask = self.isna()
+            arr[mask] = "0"
+            values = arr.astype(dtype.numpy_dtype)
+            return FloatingArray(values, mask, copy=False)
+        elif np.issubdtype(dtype, np.floating):
+            arr = self._ndarray.copy()
+            mask = self.isna()
+            arr[mask] = 0
+            values = arr.astype(dtype)
+            values[mask] = np.nan
+            return values
 
         return super().astype(dtype, copy)
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 9a1634380aaba..e35a632734779 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -366,6 +366,15 @@ def test_astype_int(dtype, request):
     tm.assert_extension_array_equal(result, expected)
 
 
+def test_astype_float(any_float_allowed_nullable_dtype):
+    # Don't compare arrays (37974)
+    ser = pd.Series(["1.1", pd.NA, "3.3"], dtype="string")
+
+    result = ser.astype(any_float_allowed_nullable_dtype)
+    expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_allowed_nullable_dtype)
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize("skipna", [True, False])
 @pytest.mark.xfail(reason="Not implemented StringArray.sum")
 def test_reduce(skipna, dtype):

From 7b400b3428c138e9f324a33e92027d41857bbeb1 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 29 Nov 2020 17:10:18 +0100
Subject: [PATCH 04/15] ENH: add use_nullable_dtypes option in read_parquet
 (#31242)

---
 doc/source/whatsnew/v1.2.0.rst  |  4 +++
 pandas/io/parquet.py            | 61 ++++++++++++++++++++++++++++++---
 pandas/tests/io/test_parquet.py | 37 ++++++++++++++++++++
 3 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 7c07601352bce..c9347b88f2072 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -241,6 +241,10 @@ Other enhancements
 - Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`).
 - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
 - :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`)
+- :func:`read_parquet` gained a ``use_nullable_dtypes=True`` option to use
+  nullable dtypes that use ``pd.NA`` as missing value indicator where possible
+  for the resulting DataFrame (default is False, and only applicable for
+  ``engine="pyarrow"``) (:issue:`31242`)
 - Added :meth:`.Rolling.sem` and :meth:`Expanding.sem` to compute the standard error of the mean (:issue:`26476`)
 - :meth:`.Rolling.var` and :meth:`.Rolling.std` use Kahan summation and Welford's Method to avoid numerical issues (:issue:`37051`)
 - :meth:`DataFrame.corr` and :meth:`DataFrame.cov` use Welford's Method to avoid numerical issues (:issue:`37448`)
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index a19b132a7891d..8b1184df92eaf 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -1,5 +1,6 @@
 """ parquet compat """
 
+from distutils.version import LooseVersion
 import io
 import os
 from typing import Any, AnyStr, Dict, List, Optional, Tuple
@@ -177,10 +178,39 @@ def write(
                 handles.close()
 
     def read(
-        self, path, columns=None, storage_options: StorageOptions = None, **kwargs
+        self,
+        path,
+        columns=None,
+        use_nullable_dtypes=False,
+        storage_options: StorageOptions = None,
+        **kwargs,
     ):
         kwargs["use_pandas_metadata"] = True
 
+        to_pandas_kwargs = {}
+        if use_nullable_dtypes:
+            if LooseVersion(self.api.__version__) >= "0.16":
+                import pandas as pd
+
+                mapping = {
+                    self.api.int8(): pd.Int8Dtype(),
+                    self.api.int16(): pd.Int16Dtype(),
+                    self.api.int32(): pd.Int32Dtype(),
+                    self.api.int64(): pd.Int64Dtype(),
+                    self.api.uint8(): pd.UInt8Dtype(),
+                    self.api.uint16(): pd.UInt16Dtype(),
+                    self.api.uint32(): pd.UInt32Dtype(),
+                    self.api.uint64(): pd.UInt64Dtype(),
+                    self.api.bool_(): pd.BooleanDtype(),
+                    self.api.string(): pd.StringDtype(),
+                }
+                to_pandas_kwargs["types_mapper"] = mapping.get
+            else:
+                raise ValueError(
+                    "'use_nullable_dtypes=True' is only supported for pyarrow >= 0.16 "
+                    f"({self.api.__version__} is installed"
+                )
+
         path_or_handle, handles, kwargs["filesystem"] = _get_path_or_handle(
             path,
             kwargs.pop("filesystem", None),
@@ -190,7 +220,7 @@ def read(
         try:
             return self.api.parquet.read_table(
                 path_or_handle, columns=columns, **kwargs
-            ).to_pandas()
+            ).to_pandas(**to_pandas_kwargs)
         finally:
             if handles is not None:
                 handles.close()
@@ -258,6 +288,12 @@ def write(
     def read(
         self, path, columns=None, storage_options: StorageOptions = None, **kwargs
     ):
+        use_nullable_dtypes = kwargs.pop("use_nullable_dtypes", False)
+        if use_nullable_dtypes:
+            raise ValueError(
+                "The 'use_nullable_dtypes' argument is not supported for the "
+                "fastparquet engine"
+            )
         path = stringify_path(path)
         parquet_kwargs = {}
         handles = None
@@ -368,7 +404,13 @@ def to_parquet(
         return None
 
 
-def read_parquet(path, engine: str = "auto", columns=None, **kwargs):
+def read_parquet(
+    path,
+    engine: str = "auto",
+    columns=None,
+    use_nullable_dtypes: bool = False,
+    **kwargs,
+):
     """
     Load a parquet object from the file path, returning a DataFrame.
 
@@ -397,6 +439,15 @@ def read_parquet(path, engine: str = "auto", columns=None, **kwargs):
         'pyarrow' is unavailable.
     columns : list, default=None
         If not None, only these columns will be read from the file.
+    use_nullable_dtypes : bool, default False
+        If True, use dtypes that use ``pd.NA`` as missing value indicator
+        for the resulting DataFrame (only applicable for ``engine="pyarrow"``).
+        As new dtypes are added that support ``pd.NA`` in the future, the
+        output with this option will change to use those dtypes.
+        Note: this is an experimental option, and behaviour (e.g. additional
+        support dtypes) may change without notice.
+
+        .. versionadded:: 1.2.0
     **kwargs
         Any additional kwargs are passed to the engine.
 
@@ -405,4 +456,6 @@ def read_parquet(path, engine: str = "auto", columns=None, **kwargs):
     DataFrame
     """
     impl = get_engine(engine)
-    return impl.read(path, columns=columns, **kwargs)
+    return impl.read(
+        path, columns=columns, use_nullable_dtypes=use_nullable_dtypes, **kwargs
+    )
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 3b83eed69c723..7e1d7fb17c8ed 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -828,6 +828,35 @@ def test_additional_extension_types(self, pa):
         )
         check_round_trip(df, pa)
 
+    @td.skip_if_no("pyarrow", min_version="0.16")
+    def test_use_nullable_dtypes(self, pa):
+        import pyarrow.parquet as pq
+
+        table = pyarrow.table(
+            {
+                "a": pyarrow.array([1, 2, 3, None], "int64"),
+                "b": pyarrow.array([1, 2, 3, None], "uint8"),
+                "c": pyarrow.array(["a", "b", "c", None]),
+                "d": pyarrow.array([True, False, True, None]),
+            }
+        )
+        with tm.ensure_clean() as path:
+            # write manually with pyarrow to write integers
+            pq.write_table(table, path)
+            result1 = read_parquet(path)
+            result2 = read_parquet(path, use_nullable_dtypes=True)
+
+        assert result1["a"].dtype == np.dtype("float64")
+        expected = pd.DataFrame(
+            {
+                "a": pd.array([1, 2, 3, None], dtype="Int64"),
+                "b": pd.array([1, 2, 3, None], dtype="UInt8"),
+                "c": pd.array(["a", "b", "c", None], dtype="string"),
+                "d": pd.array([True, False, True, None], dtype="boolean"),
+            }
+        )
+        tm.assert_frame_equal(result2, expected)
+
     @td.skip_if_no("pyarrow", min_version="0.14")
     def test_timestamp_nanoseconds(self, pa):
         # with version 2.0, pyarrow defaults to writing the nanoseconds, so
@@ -1001,3 +1030,11 @@ def test_timezone_aware_index(self, fp, timezone_aware_date_list):
         expected = df.copy()
         expected.index.name = "index"
         check_round_trip(df, fp, expected=expected)
+
+    def test_use_nullable_dtypes_not_supported(self, fp):
+        df = pd.DataFrame({"a": [1, 2]})
+
+        with tm.ensure_clean() as path:
+            df.to_parquet(path)
+            with pytest.raises(ValueError, match="not supported for the fastparquet"):
+                read_parquet(path, engine="fastparquet", use_nullable_dtypes=True)

From e99e5ab32c4e831e7bbac0346189f4d6d86a6225 Mon Sep 17 00:00:00 2001
From: patrick <61934744+phofl@users.noreply.github.com>
Date: Sun, 29 Nov 2020 18:21:52 +0100
Subject: [PATCH 05/15] BUG: Fix duplicates in intersection of multiindexes
 (#36927)

---
 doc/source/whatsnew/v1.1.5.rst                |  1 +
 pandas/core/indexes/base.py                   |  9 +++++---
 pandas/core/indexes/multi.py                  |  8 +++++--
 pandas/core/ops/__init__.py                   |  5 +++-
 pandas/core/reshape/merge.py                  |  9 ++++++--
 .../tests/indexes/base_class/test_setops.py   |  2 +-
 pandas/tests/indexes/multi/test_setops.py     | 23 +++++++++++++++++++
 pandas/tests/indexes/test_setops.py           | 10 ++++++++
 pandas/tests/reshape/merge/test_merge.py      |  2 +-
 9 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst
index 46c4ad4f35fe4..edc2f7327abfc 100644
--- a/doc/source/whatsnew/v1.1.5.rst
+++ b/doc/source/whatsnew/v1.1.5.rst
@@ -23,6 +23,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issue:`36003`)
 - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`)
 - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`).
+- Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c86652acbcd0f..3f89b0619e600 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2822,7 +2822,7 @@ def intersection(self, other, sort=False):
         self._assert_can_do_setop(other)
         other = ensure_index(other)
 
-        if self.equals(other):
+        if self.equals(other) and not self.has_duplicates:
             return self._get_reconciled_name_object(other)
 
         if not is_dtype_equal(self.dtype, other.dtype):
@@ -2847,7 +2847,7 @@ def _intersection(self, other, sort=False):
             except TypeError:
                 pass
             else:
-                return result
+                return algos.unique1d(result)
 
         try:
             indexer = Index(rvals).get_indexer(lvals)
@@ -2858,11 +2858,14 @@ def _intersection(self, other, sort=False):
             indexer = algos.unique1d(Index(rvals).get_indexer_non_unique(lvals)[0])
             indexer = indexer[indexer != -1]
 
-        result = other.take(indexer)._values
+        result = other.take(indexer).unique()._values
 
         if sort is None:
             result = algos.safe_sort(result)
 
+        # Intersection has to be unique
+        assert algos.unique(result).shape == result.shape
+
         return result
 
     def difference(self, other, sort=None):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 46846209f315b..589da4a6c4ceb 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -3601,6 +3601,8 @@ def intersection(self, other, sort=False):
         other, result_names = self._convert_can_do_setop(other)
 
         if self.equals(other):
+            if self.has_duplicates:
+                return self.unique().rename(result_names)
             return self.rename(result_names)
 
         if not is_object_dtype(other.dtype):
@@ -3619,10 +3621,12 @@ def intersection(self, other, sort=False):
         uniq_tuples = None  # flag whether _inner_indexer was successful
         if self.is_monotonic and other.is_monotonic:
             try:
-                uniq_tuples = self._inner_indexer(lvals, rvals)[0]
-                sort = False  # uniq_tuples is already sorted
+                inner_tuples = self._inner_indexer(lvals, rvals)[0]
+                sort = False  # inner_tuples is already sorted
             except TypeError:
                 pass
+            else:
+                uniq_tuples = algos.unique(inner_tuples)
 
         if uniq_tuples is None:
             other_uniq = set(rvals)
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 2b159c607b0a0..d8b5dba424cbf 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -311,7 +311,10 @@ def should_reindex_frame_op(
         # TODO: any other cases we should handle here?
         cols = left.columns.intersection(right.columns)
 
-        if len(cols) and not (cols.equals(left.columns) and cols.equals(right.columns)):
+        # Intersection is always unique so we have to check the unique columns
+        left_uniques = left.columns.unique()
+        right_uniques = right.columns.unique()
+        if len(cols) and not (cols.equals(left_uniques) and cols.equals(right_uniques)):
             # TODO: is there a shortcut available when len(cols) == 0?
             return True
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 3b755c40721fb..9bb1add309407 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1271,7 +1271,9 @@ def _validate_specification(self):
                 raise MergeError("Must pass left_on or left_index=True")
             else:
                 # use the common columns
-                common_cols = self.left.columns.intersection(self.right.columns)
+                left_cols = self.left.columns
+                right_cols = self.right.columns
+                common_cols = left_cols.intersection(right_cols)
                 if len(common_cols) == 0:
                     raise MergeError(
                         "No common columns to perform merge on. "
@@ -1280,7 +1282,10 @@ def _validate_specification(self):
                         f"left_index={self.left_index}, "
                         f"right_index={self.right_index}"
                     )
-                if not common_cols.is_unique:
+                if (
+                    not left_cols.join(common_cols, how="inner").is_unique
+                    or not right_cols.join(common_cols, how="inner").is_unique
+                ):
                     raise MergeError(f"Data columns not unique: {repr(common_cols)}")
                 self.left_on = self.right_on = common_cols
         elif self.on is not None:
diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py
index 6413b110dff2e..ddcb3c5b87ebc 100644
--- a/pandas/tests/indexes/base_class/test_setops.py
+++ b/pandas/tests/indexes/base_class/test_setops.py
@@ -141,7 +141,7 @@ def test_intersection_str_dates(self, sort):
 
     @pytest.mark.parametrize(
         "index2,expected_arr",
-        [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])],
+        [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
     )
     def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
         # non-monotonic non-unique
diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
index 4ac9a27069a3f..2ac57f1befd57 100644
--- a/pandas/tests/indexes/multi/test_setops.py
+++ b/pandas/tests/indexes/multi/test_setops.py
@@ -378,3 +378,26 @@ def test_setops_disallow_true(method):
 
     with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
         getattr(idx1, method)(idx2, sort=True)
+
+
+@pytest.mark.parametrize(
+    ("tuples", "exp_tuples"),
+    [
+        ([("val1", "test1")], [("val1", "test1")]),
+        ([("val1", "test1"), ("val1", "test1")], [("val1", "test1")]),
+        (
+            [("val2", "test2"), ("val1", "test1")],
+            [("val2", "test2"), ("val1", "test1")],
+        ),
+    ],
+)
+def test_intersect_with_duplicates(tuples, exp_tuples):
+    # GH#36915
+    left = MultiIndex.from_tuples(tuples, names=["first", "second"])
+    right = MultiIndex.from_tuples(
+        [("val1", "test1"), ("val1", "test1"), ("val2", "test2")],
+        names=["first", "second"],
+    )
+    result = left.intersection(right)
+    expected = MultiIndex.from_tuples(exp_tuples, names=["first", "second"])
+    tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
index 0973cef7cfdc1..2675c4569a8e9 100644
--- a/pandas/tests/indexes/test_setops.py
+++ b/pandas/tests/indexes/test_setops.py
@@ -120,6 +120,16 @@ def test_dunder_inplace_setops_deprecated(index):
         index ^= index
 
 
+@pytest.mark.parametrize("values", [[1, 2, 2, 3], [3, 3]])
+def test_intersection_duplicates(values):
+    # GH#31326
+    a = pd.Index(values)
+    b = pd.Index([3, 3])
+    result = a.intersection(b)
+    expected = pd.Index([3])
+    tm.assert_index_equal(result, expected)
+
+
 class TestSetOps:
     # Set operation tests shared by all indexes in the `index` fixture
     @pytest.mark.parametrize("case", [0.5, "xxx"])
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index f44909b61ff7a..40ba62a27aa68 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -753,7 +753,7 @@ def test_overlapping_columns_error_message(self):
 
         # #2649, #10639
         df2.columns = ["key1", "foo", "foo"]
-        msg = r"Data columns not unique: Index\(\['foo', 'foo'\], dtype='object'\)"
+        msg = r"Data columns not unique: Index\(\['foo'\], dtype='object'\)"
         with pytest.raises(MergeError, match=msg):
             merge(df, df2)
 

From 7070aae40b7bef2ee65ee0725ad9979b11a54704 Mon Sep 17 00:00:00 2001
From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com>
Date: Mon, 30 Nov 2020 01:45:40 +0800
Subject: [PATCH 06/15] BUG: merge_ordered fails with list-like left_by or
 right_by (#38089)

---
 doc/source/whatsnew/v1.2.0.rst                |  1 +
 pandas/core/reshape/merge.py                  |  4 +-
 .../tests/reshape/merge/test_merge_ordered.py | 62 +++++++++++++++++++
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index c9347b88f2072..fe9c067b4d2ea 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -754,6 +754,7 @@ Reshaping
 - Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`)
 - Bug in :func:`concat` resulting in a ``ValueError`` when at least one of both inputs had a non-unique index (:issue:`36263`)
 - Bug in :meth:`DataFrame.merge` and :meth:`pandas.merge` returning inconsistent ordering in result for ``how=right`` and ``how=left`` (:issue:`35382`)
+- Bug in :func:`merge_ordered` couldn't handle list-like ``left_by`` or ``right_by`` (:issue:`35269`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 9bb1add309407..545117dd84f93 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -140,9 +140,7 @@ def _groupby_and_merge(by, on, left: "DataFrame", right: "DataFrame", merge_piec
 
         # make sure join keys are in the merged
         # TODO, should merge_pieces do this?
-        for k in by:
-            if k in merged:
-                merged[k] = key
+        merged[by] = key
 
         pieces.append(merged)
 
diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py
index 17f2f44f45fce..8389a6bb9be10 100644
--- a/pandas/tests/reshape/merge/test_merge_ordered.py
+++ b/pandas/tests/reshape/merge/test_merge_ordered.py
@@ -115,3 +115,65 @@ def test_doc_example(self):
         )
 
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "left, right, on, left_by, right_by, expected",
+        [
+            (
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                DataFrame({"T": [2], "E": [1]}),
+                ["T"],
+                ["G", "H"],
+                None,
+                DataFrame(
+                    {
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                    }
+                ),
+            ),
+            (
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                DataFrame({"T": [2], "E": [1]}),
+                "T",
+                ["G", "H"],
+                None,
+                DataFrame(
+                    {
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                    }
+                ),
+            ),
+            (
+                DataFrame({"T": [2], "E": [1]}),
+                DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
+                ["T"],
+                None,
+                ["G", "H"],
+                DataFrame(
+                    {
+                        "T": [1, 2, 3],
+                        "E": [np.nan, 1.0, np.nan],
+                        "G": ["g"] * 3,
+                        "H": ["h"] * 3,
+                    }
+                ),
+            ),
+        ],
+    )
+    def test_list_type_by(self, left, right, on, left_by, right_by, expected):
+        # GH 35269
+        result = merge_ordered(
+            left=left,
+            right=right,
+            on=on,
+            left_by=left_by,
+            right_by=right_by,
+        )
+
+        tm.assert_frame_equal(result, expected)

From eaa45cf7b1135dd5c9d1fe93717594566e55ecc9 Mon Sep 17 00:00:00 2001
From: Shao Yang Hong <hongsy2006@gmail.com>
Date: Mon, 30 Nov 2020 02:06:23 +0800
Subject: [PATCH 07/15] DOC: Add behavior for Index argument in DataFrame.loc
 (#38109)

---
 pandas/core/indexing.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 6aa031af64833..f6cf691ea911c 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -259,10 +259,11 @@ def loc(self) -> "_LocIndexer":
           e.g. ``[True, False, True]``.
         - An alignable boolean Series. The index of the key will be aligned before
           masking.
+        - An alignable Index. The Index of the returned selection will be the input.
         - A ``callable`` function with one argument (the calling Series or
           DataFrame) and that returns valid output for indexing (one of the above)
 
-        See more at :ref:`Selection by Label <indexing.label>`
+        See more at :ref:`Selection by Label <indexing.label>`.
 
         Raises
         ------
@@ -332,6 +333,14 @@ def loc(self) -> "_LocIndexer":
                     max_speed  shield
         sidewinder          7       8
 
+        Index (same behavior as ``df.reindex``)
+
+        >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
+               max_speed  shield
+        foo
+        cobra          1       2
+        viper          4       5
+
         Conditional that returns a boolean Series
 
         >>> df.loc[df['shield'] > 6]

From 224d2e88b349f3e8fc56104a4d28ddb5b08dfdd6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 29 Nov 2020 10:15:12 -0800
Subject: [PATCH 08/15] REF: de-duplicate ndarray[datetimelike] wrapping
 (#38129)

---
 pandas/core/arrays/interval.py | 12 +++++++-----
 pandas/core/construction.py    | 18 ++++++++++++++++++
 pandas/core/dtypes/concat.py   | 22 +++++-----------------
 pandas/core/ops/array_ops.py   | 34 +++++-----------------------------
 4 files changed, 35 insertions(+), 51 deletions(-)

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index efb66c9a47a97..757cea2c710b2 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -44,7 +44,11 @@
 from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
 from pandas.core.arrays.categorical import Categorical
 import pandas.core.common as com
-from pandas.core.construction import array, extract_array
+from pandas.core.construction import (
+    array,
+    ensure_wrapped_if_datetimelike,
+    extract_array,
+)
 from pandas.core.indexers import check_array_indexer
 from pandas.core.indexes.base import ensure_index
 from pandas.core.ops import invalid_comparison, unpack_zerodim_and_defer
@@ -251,11 +255,9 @@ def _simple_new(
             raise ValueError(msg)
 
         # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray
-        from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array
-
-        left = maybe_upcast_datetimelike_array(left)
+        left = ensure_wrapped_if_datetimelike(left)
         left = extract_array(left, extract_numpy=True)
-        right = maybe_upcast_datetimelike_array(right)
+        right = ensure_wrapped_if_datetimelike(right)
         right = extract_array(right, extract_numpy=True)
 
         lbase = getattr(left, "_ndarray", left).base
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index f9ebe3f1e185e..96cf1be7520fb 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -402,6 +402,24 @@ def extract_array(obj: object, extract_numpy: bool = False) -> Union[Any, ArrayL
     return obj
 
 
+def ensure_wrapped_if_datetimelike(arr):
+    """
+    Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
+    """
+    if isinstance(arr, np.ndarray):
+        if arr.dtype.kind == "M":
+            from pandas.core.arrays import DatetimeArray
+
+            return DatetimeArray._from_sequence(arr)
+
+        elif arr.dtype.kind == "m":
+            from pandas.core.arrays import TimedeltaArray
+
+            return TimedeltaArray._from_sequence(arr)
+
+    return arr
+
+
 def sanitize_array(
     data,
     index: Optional[Index],
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 63e3440558c75..a9355e30cd3c2 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -18,7 +18,7 @@
 
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseArray
-from pandas.core.construction import array
+from pandas.core.construction import array, ensure_wrapped_if_datetimelike
 
 
 def _get_dtype_kinds(arrays) -> Set[str]:
@@ -360,12 +360,14 @@ def _concat_datetime(to_concat, axis=0):
     -------
     a single array, preserving the combined dtypes
     """
-    to_concat = [_wrap_datetimelike(x) for x in to_concat]
+    to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat]
+
     single_dtype = len({x.dtype for x in to_concat}) == 1
 
     # multiple types, need to coerce to object
     if not single_dtype:
-        # wrap_datetimelike ensures that astype(object) wraps in Timestamp/Timedelta
+        # ensure_wrapped_if_datetimelike ensures that astype(object) wraps
+        #  in Timestamp/Timedelta
         return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
 
     if axis == 1:
@@ -379,17 +381,3 @@ def _concat_datetime(to_concat, axis=0):
         assert result.shape[0] == 1
         result = result[0]
     return result
-
-
-def _wrap_datetimelike(arr):
-    """
-    Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
-
-    DTA/TDA handle .astype(object) correctly.
-    """
-    from pandas.core.construction import array as pd_array, extract_array
-
-    arr = extract_array(arr, extract_numpy=True)
-    if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]:
-        arr = pd_array(arr)
-    return arr
diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index c855687552e82..41d539564d91e 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -30,6 +30,7 @@
 from pandas.core.dtypes.generic import ABCExtensionArray, ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna, notna
 
+from pandas.core.construction import ensure_wrapped_if_datetimelike
 from pandas.core.ops import missing
 from pandas.core.ops.dispatch import should_extension_dispatch
 from pandas.core.ops.invalid import invalid_comparison
@@ -175,8 +176,8 @@ def arithmetic_op(left: ArrayLike, right: Any, op):
 
     # NB: We assume that extract_array has already been called
     #  on `left` and `right`.
-    lvalues = maybe_upcast_datetimelike_array(left)
-    rvalues = maybe_upcast_datetimelike_array(right)
+    lvalues = ensure_wrapped_if_datetimelike(left)
+    rvalues = ensure_wrapped_if_datetimelike(right)
     rvalues = _maybe_upcast_for_op(rvalues, lvalues.shape)
 
     if should_extension_dispatch(lvalues, rvalues) or isinstance(rvalues, Timedelta):
@@ -206,7 +207,7 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
     ndarray or ExtensionArray
     """
     # NB: We assume extract_array has already been called on left and right
-    lvalues = maybe_upcast_datetimelike_array(left)
+    lvalues = ensure_wrapped_if_datetimelike(left)
     rvalues = right
 
     rvalues = lib.item_from_zerodim(rvalues)
@@ -331,7 +332,7 @@ def fill_bool(x, left=None):
         right = construct_1d_object_array_from_listlike(right)
 
     # NB: We assume extract_array has already been called on left and right
-    lvalues = maybe_upcast_datetimelike_array(left)
+    lvalues = ensure_wrapped_if_datetimelike(left)
     rvalues = right
 
     if should_extension_dispatch(lvalues, rvalues):
@@ -400,31 +401,6 @@ def get_array_op(op):
         raise NotImplementedError(op_name)
 
 
-def maybe_upcast_datetimelike_array(obj: ArrayLike) -> ArrayLike:
-    """
-    If we have an ndarray that is either datetime64 or timedelta64, wrap in EA.
-
-    Parameters
-    ----------
-    obj : ndarray or ExtensionArray
-
-    Returns
-    -------
-    ndarray or ExtensionArray
-    """
-    if isinstance(obj, np.ndarray):
-        if obj.dtype.kind == "m":
-            from pandas.core.arrays import TimedeltaArray
-
-            return TimedeltaArray._from_sequence(obj)
-        if obj.dtype.kind == "M":
-            from pandas.core.arrays import DatetimeArray
-
-            return DatetimeArray._from_sequence(obj)
-
-    return obj
-
-
 def _maybe_upcast_for_op(obj, shape: Shape):
     """
     Cast non-pandas objects to pandas types to unify behavior of arithmetic

From 1cb5f69c6f07808c0df9a96b5a5679a8308ffae9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 29 Nov 2020 10:15:59 -0800
Subject: [PATCH 09/15] BUG: Index.intersection casting to object instead of
 numeric (#38122)

---
 doc/source/whatsnew/v1.2.0.rst             |  1 +
 pandas/core/indexes/base.py                |  6 +++--
 pandas/core/indexes/multi.py               | 10 +++----
 pandas/tests/indexes/multi/test_setops.py  | 20 +++++++++++++-
 pandas/tests/indexes/ranges/test_setops.py | 31 +++++++++++++++++++++-
 5 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index fe9c067b4d2ea..3b1d1b4f241b5 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -782,6 +782,7 @@ Other
 - Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`, :issue:`37381`)
 - Fixed metadata propagation when selecting columns with ``DataFrame.__getitem__`` (:issue:`28283`)
 - Bug in :meth:`Index.union` behaving differently depending on whether operand is an :class:`Index` or other list-like (:issue:`36384`)
+- Bug in :meth:`Index.intersection` with non-matching numeric dtypes casting to ``object`` dtype instead of minimal common dtype (:issue:`38122`)
 - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`)
 - Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`)
 - Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3f89b0619e600..09fe885e47754 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -33,6 +33,7 @@
 from pandas.util._decorators import Appender, cache_readonly, doc
 
 from pandas.core.dtypes.cast import (
+    find_common_type,
     maybe_cast_to_integer_array,
     validate_numeric_casting,
 )
@@ -2826,8 +2827,9 @@ def intersection(self, other, sort=False):
             return self._get_reconciled_name_object(other)
 
         if not is_dtype_equal(self.dtype, other.dtype):
-            this = self.astype("O")
-            other = other.astype("O")
+            dtype = find_common_type([self.dtype, other.dtype])
+            this = self.astype(dtype)
+            other = other.astype(dtype)
             return this.intersection(other, sort=sort)
 
         result = self._intersection(other, sort=sort)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 589da4a6c4ceb..4aedf03ca1800 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -3717,16 +3717,14 @@ def _convert_can_do_setop(self, other):
         if not isinstance(other, Index):
 
             if len(other) == 0:
-                other = MultiIndex(
-                    levels=[[]] * self.nlevels,
-                    codes=[[]] * self.nlevels,
-                    verify_integrity=False,
-                )
+                return self[:0], self.names
             else:
                 msg = "other must be a MultiIndex or a list of tuples"
                 try:
                     other = MultiIndex.from_tuples(other)
-                except TypeError as err:
+                except (ValueError, TypeError) as err:
+                    # ValueError raised by tupels_to_object_array if we
+                    #  have non-object dtype
                     raise TypeError(msg) from err
         else:
             result_names = get_unanimous_names(self, other)
diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
index 2ac57f1befd57..51538c556de15 100644
--- a/pandas/tests/indexes/multi/test_setops.py
+++ b/pandas/tests/indexes/multi/test_setops.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pandas as pd
-from pandas import MultiIndex, Series
+from pandas import Index, MultiIndex, Series
 import pandas._testing as tm
 
 
@@ -294,6 +294,24 @@ def test_intersection(idx, sort):
     # assert result.equals(tuples)
 
 
+def test_intersection_non_object(idx, sort):
+    other = Index(range(3), name="foo")
+
+    result = idx.intersection(other, sort=sort)
+    expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=None)
+    tm.assert_index_equal(result, expected, exact=True)
+
+    # if we pass a length-0 ndarray (i.e. no name, we retain our idx.name)
+    result = idx.intersection(np.asarray(other)[:0], sort=sort)
+    expected = MultiIndex(levels=idx.levels, codes=[[]] * idx.nlevels, names=idx.names)
+    tm.assert_index_equal(result, expected, exact=True)
+
+    msg = "other must be a MultiIndex or a list of tuples"
+    with pytest.raises(TypeError, match=msg):
+        # With non-zero length non-index, we try and fail to convert to tuples
+        idx.intersection(np.asarray(other), sort=sort)
+
+
 def test_intersect_equal_sort():
     # GH-24959
     idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]])
diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py
index 1fd41b017221b..5623b0904c0d5 100644
--- a/pandas/tests/indexes/ranges/test_setops.py
+++ b/pandas/tests/indexes/ranges/test_setops.py
@@ -3,11 +3,40 @@
 import numpy as np
 import pytest
 
-from pandas import Index, Int64Index, RangeIndex
+from pandas import Index, Int64Index, RangeIndex, UInt64Index
 import pandas._testing as tm
 
 
 class TestRangeIndexSetOps:
+    @pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index])
+    def test_intersection_mismatched_dtype(self, klass):
+        # check that we cast to float, not object
+        index = RangeIndex(start=0, stop=20, step=2, name="foo")
+        index = klass(index)
+
+        flt = index.astype(np.float64)
+
+        # bc index.equals(flt), we go through fastpath and get RangeIndex back
+        result = index.intersection(flt)
+        tm.assert_index_equal(result, index, exact=True)
+
+        result = flt.intersection(index)
+        tm.assert_index_equal(result, flt, exact=True)
+
+        # neither empty, not-equals
+        result = index.intersection(flt[1:])
+        tm.assert_index_equal(result, flt[1:], exact=True)
+
+        result = flt[1:].intersection(index)
+        tm.assert_index_equal(result, flt[1:], exact=True)
+
+        # empty other
+        result = index.intersection(flt[:0])
+        tm.assert_index_equal(result, flt[:0], exact=True)
+
+        result = flt[:0].intersection(index)
+        tm.assert_index_equal(result, flt[:0], exact=True)
+
     def test_intersection(self, sort):
         # intersect with Int64Index
         index = RangeIndex(start=0, stop=20, step=2)

From 59710bcd85ab8982da1bb26af0db7575a2c3565f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 29 Nov 2020 10:23:46 -0800
Subject: [PATCH 10/15] CLN: remove unnecesary cast.maybe_convert_objects
 (#38144)

---
 pandas/core/dtypes/cast.py                    | 52 -------------------
 pandas/core/groupby/generic.py                |  6 +--
 .../tests/dtypes/cast/test_convert_objects.py | 12 -----
 3 files changed, 3 insertions(+), 67 deletions(-)
 delete mode 100644 pandas/tests/dtypes/cast/test_convert_objects.py

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index fe40bc42887c4..27c5527536057 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -99,7 +99,6 @@
     from pandas import Series
     from pandas.core.arrays import ExtensionArray
     from pandas.core.indexes.base import Index
-    from pandas.core.indexes.datetimes import DatetimeIndex
 
 _int8_max = np.iinfo(np.int8).max
 _int16_max = np.iinfo(np.int16).max
@@ -1121,57 +1120,6 @@ def astype_nansafe(
     return arr.view(dtype)
 
 
-def maybe_convert_objects(
-    values: np.ndarray, convert_numeric: bool = True
-) -> Union[np.ndarray, "DatetimeIndex"]:
-    """
-    If we have an object dtype array, try to coerce dates and/or numbers.
-
-    Parameters
-    ----------
-    values : ndarray
-    convert_numeric : bool, default True
-
-    Returns
-    -------
-    ndarray or DatetimeIndex
-    """
-    validate_bool_kwarg(convert_numeric, "convert_numeric")
-
-    orig_values = values
-
-    # convert dates
-    if is_object_dtype(values.dtype):
-        values = lib.maybe_convert_objects(values, convert_datetime=True)
-
-    # convert timedeltas
-    if is_object_dtype(values.dtype):
-        values = lib.maybe_convert_objects(values, convert_timedelta=True)
-
-    # convert to numeric
-    if is_object_dtype(values.dtype):
-        if convert_numeric:
-            try:
-                new_values = lib.maybe_convert_numeric(
-                    values, set(), coerce_numeric=True
-                )
-            except (ValueError, TypeError):
-                pass
-            else:
-                # if we are all nans then leave me alone
-                if not isna(new_values).all():
-                    values = new_values
-
-        else:
-            # soft-conversion
-            values = lib.maybe_convert_objects(values)
-
-    if values is orig_values:
-        values = values.copy()
-
-    return values
-
-
 def soft_convert_objects(
     values: np.ndarray,
     datetime: bool = True,
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 244c47cd1f1ea..b9226732d5a69 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -37,7 +37,6 @@
     find_common_type,
     maybe_cast_result,
     maybe_cast_result_dtype,
-    maybe_convert_objects,
     maybe_downcast_numeric,
 )
 from pandas.core.dtypes.common import (
@@ -1867,8 +1866,9 @@ def _recast_datetimelike_result(result: DataFrame) -> DataFrame:
 
     # See GH#26285
     for n in obj_cols:
-        converted = maybe_convert_objects(
-            result.iloc[:, n].values, convert_numeric=False
+        values = result.iloc[:, n].values
+        converted = lib.maybe_convert_objects(
+            values, convert_datetime=True, convert_timedelta=True
         )
 
         result.iloc[:, n] = converted
diff --git a/pandas/tests/dtypes/cast/test_convert_objects.py b/pandas/tests/dtypes/cast/test_convert_objects.py
deleted file mode 100644
index a28d554acd312..0000000000000
--- a/pandas/tests/dtypes/cast/test_convert_objects.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import numpy as np
-import pytest
-
-from pandas.core.dtypes.cast import maybe_convert_objects
-
-
-@pytest.mark.parametrize("data", [[1, 2], ["apply", "banana"]])
-def test_maybe_convert_objects_copy(data):
-    arr = np.array(data)
-    out = maybe_convert_objects(arr)
-
-    assert arr is not out

From 47d0da67be1ab53c92863365a72a4936be281442 Mon Sep 17 00:00:00 2001
From: Terji Petersen <contribute@tensortable.com>
Date: Sun, 29 Nov 2020 19:11:29 +0000
Subject: [PATCH 11/15] API: membership checks on ExtensionArray containing NA
 values (#37867)

---
 doc/source/whatsnew/v1.2.0.rst             |  1 +
 pandas/core/arrays/base.py                 | 18 +++++++++++++++++
 pandas/tests/extension/arrow/test_bool.py  |  4 ++++
 pandas/tests/extension/base/interface.py   | 23 ++++++++++++++++++++++
 pandas/tests/extension/decimal/array.py    |  8 ++++++++
 pandas/tests/extension/json/test_json.py   |  7 +++++++
 pandas/tests/extension/test_categorical.py | 22 +++++++++++++++++++++
 7 files changed, 83 insertions(+)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 3b1d1b4f241b5..873437d917515 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -770,6 +770,7 @@ ExtensionArray
 - Fixed bug when applying a NumPy ufunc with multiple outputs to an :class:`.IntegerArray` returning None (:issue:`36913`)
 - Fixed an inconsistency in :class:`.PeriodArray`'s ``__init__`` signature to those of :class:`.DatetimeArray` and :class:`.TimedeltaArray` (:issue:`37289`)
 - Reductions for :class:`.BooleanArray`, :class:`.Categorical`, :class:`.DatetimeArray`, :class:`.FloatingArray`, :class:`.IntegerArray`, :class:`.PeriodArray`, :class:`.TimedeltaArray`, and :class:`.PandasArray` are now keyword-only methods (:issue:`37541`)
+- Fixed a bug where a  ``TypeError`` was wrongly raised if a membership check was made on an ``ExtensionArray`` containing nan-like values (:issue:`37867`)
 
 Other
 ^^^^^
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 448025e05422d..76b7877b0ac70 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -37,6 +37,7 @@
     is_array_like,
     is_dtype_equal,
     is_list_like,
+    is_scalar,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -354,6 +355,23 @@ def __iter__(self):
         for i in range(len(self)):
             yield self[i]
 
+    def __contains__(self, item) -> bool:
+        """
+        Return for `item in self`.
+        """
+        # GH37867
+        # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]
+        # would raise a TypeError. The implementation below works around that.
+        if is_scalar(item) and isna(item):
+            if not self._can_hold_na:
+                return False
+            elif item is self.dtype.na_value or isinstance(item, self.dtype.type):
+                return self.isna().any()
+            else:
+                return False
+        else:
+            return (item == self).any()
+
     def __eq__(self, other: Any) -> ArrayLike:
         """
         Return for `self == other` (element-wise equality).
diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py
index 12426a0c92c55..922b3b94c16c1 100644
--- a/pandas/tests/extension/arrow/test_bool.py
+++ b/pandas/tests/extension/arrow/test_bool.py
@@ -50,6 +50,10 @@ def test_view(self, data):
         # __setitem__ does not work, so we only have a smoke-test
         data.view()
 
+    @pytest.mark.xfail(raises=AssertionError, reason="Not implemented yet")
+    def test_contains(self, data, data_missing, nulls_fixture):
+        super().test_contains(data, data_missing, nulls_fixture)
+
 
 class TestConstructors(BaseArrowTests, base.BaseConstructorsTests):
     def test_from_dtype(self, data):
diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py
index 9ae4b01508d79..d7997310dde3d 100644
--- a/pandas/tests/extension/base/interface.py
+++ b/pandas/tests/extension/base/interface.py
@@ -29,6 +29,29 @@ def test_can_hold_na_valid(self, data):
         # GH-20761
         assert data._can_hold_na is True
 
+    def test_contains(self, data, data_missing, nulls_fixture):
+        # GH-37867
+        # Tests for membership checks. Membership checks for nan-likes is tricky and
+        # the settled on rule is: `nan_like in arr` is True if nan_like is
+        # arr.dtype.na_value and arr.isna().any() is True. Else the check returns False.
+
+        na_value = data.dtype.na_value
+        # ensure data without missing values
+        data = data[~data.isna()]
+
+        # first elements are non-missing
+        assert data[0] in data
+        assert data_missing[0] in data_missing
+
+        # check the presence of na_value
+        assert na_value in data_missing
+        assert na_value not in data
+
+        if nulls_fixture is not na_value:
+            # the data can never contain other nan-likes than na_value
+            assert nulls_fixture not in data
+            assert nulls_fixture not in data_missing
+
     def test_memory_usage(self, data):
         s = pd.Series(data)
         result = s.memory_usage(index=False)
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 9ede9c7fbd0fd..a713550dafa5c 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -155,6 +155,14 @@ def __setitem__(self, key, value):
     def __len__(self) -> int:
         return len(self._data)
 
+    def __contains__(self, item) -> bool:
+        if not isinstance(item, decimal.Decimal):
+            return False
+        elif item.is_nan():
+            return self.isna().any()
+        else:
+            return super().__contains__(item)
+
     @property
     def nbytes(self) -> int:
         n = len(self)
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 74ca341e27bf8..3a5e49796c53b 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -143,6 +143,13 @@ def test_custom_asserts(self):
         with pytest.raises(AssertionError, match=msg):
             self.assert_frame_equal(a.to_frame(), b.to_frame())
 
+    @pytest.mark.xfail(
+        reason="comparison method not implemented for JSONArray (GH-37867)"
+    )
+    def test_contains(self, data):
+        # GH-37867
+        super().test_contains(data)
+
 
 class TestConstructors(BaseJSON, base.BaseConstructorsTests):
     @pytest.mark.skip(reason="not implemented constructor from dtype")
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 95f338cbc3240..d03a9ab6b2588 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -87,6 +87,28 @@ def test_memory_usage(self, data):
         # Is this deliberate?
         super().test_memory_usage(data)
 
+    def test_contains(self, data, data_missing, nulls_fixture):
+        # GH-37867
+        # na value handling in Categorical.__contains__ is deprecated.
+        # See base.BaseInterFaceTests.test_contains for more details.
+
+        na_value = data.dtype.na_value
+        # ensure data without missing values
+        data = data[~data.isna()]
+
+        # first elements are non-missing
+        assert data[0] in data
+        assert data_missing[0] in data_missing
+
+        # check the presence of na_value
+        assert na_value in data_missing
+        assert na_value not in data
+
+        # Categoricals can contain other nan-likes than na_value
+        if nulls_fixture is not na_value:
+            assert nulls_fixture not in data
+            assert nulls_fixture in data_missing  # this line differs from super method
+
 
 class TestConstructors(base.BaseConstructorsTests):
     pass

From 4a35f2d6ecd1bea4e064384c48346aaf245188ff Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 29 Nov 2020 20:12:54 +0100
Subject: [PATCH 12/15] ENH: include conversion to nullable float in
 convert_dtypes() (#38117)

---
 pandas/core/dtypes/cast.py                    | 32 ++++++++++++++-
 pandas/core/generic.py                        | 41 ++++++++++++++-----
 pandas/core/series.py                         |  9 +++-
 .../series/methods/test_convert_dtypes.py     | 35 ++++++++++++----
 4 files changed, 96 insertions(+), 21 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 27c5527536057..3c4c811c94534 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -1196,6 +1196,7 @@ def convert_dtypes(
     convert_string: bool = True,
     convert_integer: bool = True,
     convert_boolean: bool = True,
+    convert_floating: bool = True,
 ) -> Dtype:
     """
     Convert objects to best possible type, and optionally,
@@ -1210,6 +1211,10 @@ def convert_dtypes(
         Whether, if possible, conversion can be done to integer extension types.
     convert_boolean : bool, defaults True
         Whether object dtypes should be converted to ``BooleanDtypes()``.
+    convert_floating : bool, defaults True
+        Whether, if possible, conversion can be done to floating extension types.
+        If `convert_integer` is also True, preference will be give to integer
+        dtypes if the floats can be faithfully casted to integers.
 
     Returns
     -------
@@ -1217,7 +1222,9 @@ def convert_dtypes(
         new dtype
     """
     is_extension = is_extension_array_dtype(input_array.dtype)
-    if (convert_string or convert_integer or convert_boolean) and not is_extension:
+    if (
+        convert_string or convert_integer or convert_boolean or convert_floating
+    ) and not is_extension:
         try:
             inferred_dtype = lib.infer_dtype(input_array)
         except ValueError:
@@ -1245,6 +1252,29 @@ def convert_dtypes(
             if is_integer_dtype(inferred_dtype):
                 inferred_dtype = input_array.dtype
 
+        if convert_floating:
+            if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
+                input_array.dtype
+            ):
+                from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE
+
+                inferred_float_dtype = FLOAT_STR_TO_DTYPE.get(
+                    input_array.dtype.name, "Float64"
+                )
+                # if we could also convert to integer, check if all floats
+                # are actually integers
+                if convert_integer:
+                    arr = input_array[notna(input_array)]
+                    if (arr.astype(int) == arr).all():
+                        inferred_dtype = "Int64"
+                    else:
+                        inferred_dtype = inferred_float_dtype
+                else:
+                    inferred_dtype = inferred_float_dtype
+        else:
+            if is_float_dtype(inferred_dtype):
+                inferred_dtype = input_array.dtype
+
         if convert_boolean:
             if is_bool_dtype(input_array.dtype):
                 inferred_dtype = "boolean"
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c7448cf8f8e40..c9f862d136477 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6088,6 +6088,7 @@ def convert_dtypes(
         convert_string: bool_t = True,
         convert_integer: bool_t = True,
         convert_boolean: bool_t = True,
+        convert_floating: bool_t = True,
     ) -> FrameOrSeries:
         """
         Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
@@ -6104,6 +6105,12 @@ def convert_dtypes(
             Whether, if possible, conversion can be done to integer extension types.
         convert_boolean : bool, defaults True
             Whether object dtypes should be converted to ``BooleanDtypes()``.
+        convert_floating : bool, defaults True
+            Whether, if possible, conversion can be done to floating extension types.
+            If `convert_integer` is also True, preference will be give to integer
+            dtypes if the floats can be faithfully casted to integers.
+
+            .. versionadded:: 1.2.0
 
         Returns
         -------
@@ -6121,19 +6128,25 @@ def convert_dtypes(
         -----
         By default, ``convert_dtypes`` will attempt to convert a Series (or each
         Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
-        ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is
-        possible to turn off individual conversions to ``StringDtype``, the integer
-        extension types or ``BooleanDtype``, respectively.
+        ``convert_string``, ``convert_integer``, ``convert_boolean`` and
+        ``convert_boolean``, it is possible to turn off individual conversions
+        to ``StringDtype``, the integer extension types, ``BooleanDtype``
+        or floating extension types, respectively.
 
         For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
         rules as during normal Series/DataFrame construction.  Then, if possible,
-        convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension
-        type, otherwise leave as ``object``.
+        convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer
+        or floating extension type, otherwise leave as ``object``.
 
         If the dtype is integer, convert to an appropriate integer extension type.
 
         If the dtype is numeric, and consists of all integers, convert to an
-        appropriate integer extension type.
+        appropriate integer extension type. Otherwise, convert to an
+        appropriate floating extension type.
+
+        .. versionchanged:: 1.2
+            Starting with pandas 1.2, this method also converts float columns
+            to the nullable floating extension type.
 
         In the future, as new dtypes are added that support ``pd.NA``, the results
         of this method will change to support those new dtypes.
@@ -6173,7 +6186,7 @@ def convert_dtypes(
         >>> dfn = df.convert_dtypes()
         >>> dfn
            a  b      c     d     e      f
-        0  1  x   True     h    10    NaN
+        0  1  x   True     h    10   <NA>
         1  2  y  False     i  <NA>  100.5
         2  3  z   <NA>  <NA>    20  200.0
 
@@ -6183,7 +6196,7 @@ def convert_dtypes(
         c    boolean
         d     string
         e      Int64
-        f    float64
+        f    Float64
         dtype: object
 
         Start with a Series of strings and missing data represented by ``np.nan``.
@@ -6205,12 +6218,20 @@ def convert_dtypes(
         """
         if self.ndim == 1:
             return self._convert_dtypes(
-                infer_objects, convert_string, convert_integer, convert_boolean
+                infer_objects,
+                convert_string,
+                convert_integer,
+                convert_boolean,
+                convert_floating,
             )
         else:
             results = [
                 col._convert_dtypes(
-                    infer_objects, convert_string, convert_integer, convert_boolean
+                    infer_objects,
+                    convert_string,
+                    convert_integer,
+                    convert_boolean,
+                    convert_floating,
                 )
                 for col_name, col in self.items()
             ]
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d493ac0a8c051..1f4221206e5bc 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4706,6 +4706,7 @@ def _convert_dtypes(
         convert_string: bool = True,
         convert_integer: bool = True,
         convert_boolean: bool = True,
+        convert_floating: bool = True,
     ) -> "Series":
         input_series = self
         if infer_objects:
@@ -4713,9 +4714,13 @@ def _convert_dtypes(
             if is_object_dtype(input_series):
                 input_series = input_series.copy()
 
-        if convert_string or convert_integer or convert_boolean:
+        if convert_string or convert_integer or convert_boolean or convert_floating:
             inferred_dtype = convert_dtypes(
-                input_series._values, convert_string, convert_integer, convert_boolean
+                input_series._values,
+                convert_string,
+                convert_integer,
+                convert_boolean,
+                convert_floating,
             )
             try:
                 result = input_series.astype(inferred_dtype)
diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
index d44667b258414..920182a99e9ef 100644
--- a/pandas/tests/series/methods/test_convert_dtypes.py
+++ b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -58,9 +58,17 @@
         [10, np.nan, 20],
         np.dtype("float"),
         "Int64",
-        {("convert_integer", False): np.dtype("float")},
+        {
+            ("convert_integer", False, "convert_floating", True): "Float64",
+            ("convert_integer", False, "convert_floating", False): np.dtype("float"),
+        },
+    ),
+    (
+        [np.nan, 100.5, 200],
+        np.dtype("float"),
+        "Float64",
+        {("convert_floating", False): np.dtype("float")},
     ),
-    ([np.nan, 100.5, 200], np.dtype("float"), np.dtype("float"), {}),
     (
         [3, 4, 5],
         "Int8",
@@ -85,20 +93,30 @@
         "Int8",
         {("convert_integer", False): np.dtype("i1")},
     ),
+    (
+        [1.2, 1.3],
+        np.dtype("float32"),
+        "Float32",
+        {("convert_floating", False): np.dtype("float32")},
+    ),
     (
         [1, 2.0],
         object,
         "Int64",
         {
-            ("convert_integer", False): np.dtype("float"),
+            ("convert_integer", False): "Float64",
+            ("convert_integer", False, "convert_floating", False): np.dtype("float"),
             ("infer_objects", False): np.dtype("object"),
         },
     ),
     (
         [1, 2.5],
         object,
-        np.dtype("float"),
-        {("infer_objects", False): np.dtype("object")},
+        "Float64",
+        {
+            ("convert_floating", False): np.dtype("float"),
+            ("infer_objects", False): np.dtype("object"),
+        },
     ),
     (["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
     (
@@ -134,7 +152,7 @@ class TestSeriesConvertDtypes:
         "data, maindtype, expected_default, expected_other",
         test_cases,
     )
-    @pytest.mark.parametrize("params", product(*[(True, False)] * 4))
+    @pytest.mark.parametrize("params", product(*[(True, False)] * 5))
     def test_convert_dtypes(
         self, data, maindtype, params, expected_default, expected_other
     ):
@@ -150,12 +168,13 @@ def test_convert_dtypes(
             "convert_string",
             "convert_integer",
             "convert_boolean",
+            "convert_floating",
         ]
         params_dict = dict(zip(param_names, params))
 
         expected_dtype = expected_default
-        for (key, val), dtype in expected_other.items():
-            if params_dict[key] is val:
+        for spec, dtype in expected_other.items():
+            if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
                 expected_dtype = dtype
 
         expected = pd.Series(data, dtype=expected_dtype)

From f65f0d3edb275faf37435ba1fa2780240b105b48 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 29 Nov 2020 11:18:39 -0800
Subject: [PATCH 13/15] DEPR: ExtensionOpsMixin -> OpsMixin (#38142)

---
 doc/source/whatsnew/v1.2.0.rst                |  1 +
 pandas/core/arrays/base.py                    | 16 +++++++
 pandas/tests/arrays/test_deprecations.py      | 19 ++++++++
 pandas/tests/extension/decimal/array.py       | 44 ++++++++++++++++---
 .../tests/extension/decimal/test_decimal.py   |  7 +--
 5 files changed, 76 insertions(+), 11 deletions(-)
 create mode 100644 pandas/tests/arrays/test_deprecations.py

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 873437d917515..bb06bcc9b5aa8 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -492,6 +492,7 @@ Deprecations
 - Deprecated :meth:`Index.asi8` for :class:`Index` subclasses other than :class:`.DatetimeIndex`, :class:`.TimedeltaIndex`, and :class:`PeriodIndex` (:issue:`37877`)
 - The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`)
 - The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`)
+- :class:`ExtensionOpsMixin` and :class:`ExtensionScalarOpsMixin` are deprecated and will be removed in a future version.  Use ``pd.core.arraylike.OpsMixin`` instead (:issue:`37080`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 76b7877b0ac70..e3469bba23ccd 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -21,6 +21,7 @@
     Union,
     cast,
 )
+import warnings
 
 import numpy as np
 
@@ -1237,6 +1238,21 @@ class ExtensionOpsMixin:
        with NumPy arrays.
     """
 
+    def __init_subclass__(cls, **kwargs):
+        # We use __init_subclass__ to handle deprecations
+        super().__init_subclass__()
+
+        if cls.__name__ != "ExtensionScalarOpsMixin":
+            # We only want to warn for user-defined subclasses,
+            #  and cannot reference ExtensionScalarOpsMixin directly at this point.
+            warnings.warn(
+                "ExtensionOpsMixin and ExtensionScalarOpsMixin are deprecated "
+                "and will be removed in a future version. Use "
+                "pd.core.arraylike.OpsMixin instead.",
+                FutureWarning,
+                stacklevel=2,
+            )
+
     @classmethod
     def _create_arithmetic_method(cls, op):
         raise AbstractMethodError(cls)
diff --git a/pandas/tests/arrays/test_deprecations.py b/pandas/tests/arrays/test_deprecations.py
new file mode 100644
index 0000000000000..7e80072e8794f
--- /dev/null
+++ b/pandas/tests/arrays/test_deprecations.py
@@ -0,0 +1,19 @@
+import pandas._testing as tm
+from pandas.core.arrays import (
+    ExtensionArray,
+    ExtensionOpsMixin,
+    ExtensionScalarOpsMixin,
+)
+
+
+def test_extension_ops_mixin_deprecated():
+    # GH#37080 deprecated in favor of OpsMixin
+    with tm.assert_produces_warning(FutureWarning):
+
+        class MySubclass(ExtensionOpsMixin, ExtensionArray):
+            pass
+
+    with tm.assert_produces_warning(FutureWarning):
+
+        class MyOtherSubclass(ExtensionScalarOpsMixin, ExtensionArray):
+            pass
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index a713550dafa5c..d7bdca4b218b5 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -7,12 +7,13 @@
 import numpy as np
 
 from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.cast import maybe_cast_to_extension_array
 from pandas.core.dtypes.common import is_dtype_equal, is_list_like, pandas_dtype
 
 import pandas as pd
 from pandas.api.extensions import no_default, register_extension_dtype
 from pandas.core.arraylike import OpsMixin
-from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin
+from pandas.core.arrays import ExtensionArray
 from pandas.core.indexers import check_array_indexer
 
 
@@ -45,7 +46,7 @@ def _is_numeric(self) -> bool:
         return True
 
 
-class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray):
+class DecimalArray(OpsMixin, ExtensionArray):
     __array_priority__ = 1000
 
     def __init__(self, values, dtype=None, copy=False, context=None):
@@ -225,6 +226,42 @@ def convert_values(param):
 
         return np.asarray(res, dtype=bool)
 
+    _do_coerce = True  # overriden in DecimalArrayWithoutCoercion
+
+    def _arith_method(self, other, op):
+        def convert_values(param):
+            if isinstance(param, ExtensionArray) or is_list_like(param):
+                ovalues = param
+            else:  # Assume its an object
+                ovalues = [param] * len(self)
+            return ovalues
+
+        lvalues = self
+        rvalues = convert_values(other)
+
+        # If the operator is not defined for the underlying objects,
+        # a TypeError should be raised
+        res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
+
+        def _maybe_convert(arr):
+            if self._do_coerce:
+                # https://github.com/pandas-dev/pandas/issues/22850
+                # We catch all regular exceptions here, and fall back
+                # to an ndarray.
+                res = maybe_cast_to_extension_array(type(self), arr)
+                if not isinstance(res, type(self)):
+                    # exception raised in _from_sequence; ensure we have ndarray
+                    res = np.asarray(arr)
+            else:
+                res = np.asarray(arr)
+            return res
+
+        if op.__name__ in {"divmod", "rdivmod"}:
+            a, b = zip(*res)
+            return _maybe_convert(a), _maybe_convert(b)
+
+        return _maybe_convert(res)
+
 
 def to_decimal(values, context=None):
     return DecimalArray([decimal.Decimal(x) for x in values], context=context)
@@ -232,6 +269,3 @@ def to_decimal(values, context=None):
 
 def make_data():
     return [decimal.Decimal(random.random()) for _ in range(100)]
-
-
-DecimalArray._add_arithmetic_ops()
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 233b658d29782..c3e84f75ebe68 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -335,12 +335,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
 
 
 class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence):
-    @classmethod
-    def _create_arithmetic_method(cls, op):
-        return cls._create_method(op, coerce_to_dtype=False)
-
-
-DecimalArrayWithoutCoercion._add_arithmetic_ops()
+    _do_coerce = False
 
 
 def test_combine_from_sequence_raises():

From d98b37dd5604844e7ce9eeb4d7e1db7efd86c07e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 29 Nov 2020 11:25:08 -0800
Subject: [PATCH 14/15] REF: use np.where instead of maybe_upcast_putmask in
 nanops (#38130)

---
 pandas/core/nanops.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 80c4cd5b44a92..88662a4fabed8 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -12,7 +12,6 @@
 from pandas._typing import ArrayLike, Dtype, DtypeObj, F, Scalar
 from pandas.compat._optional import import_optional_dependency
 
-from pandas.core.dtypes.cast import maybe_upcast_putmask
 from pandas.core.dtypes.common import (
     get_dtype,
     is_any_int_dtype,
@@ -284,7 +283,7 @@ def _get_values(
     """
     # In _get_values is only called from within nanops, and in all cases
     #  with scalar fill_value.  This guarantee is important for the
-    #  maybe_upcast_putmask call below
+    #  np.where call below
     assert is_scalar(fill_value)
     values = extract_array(values, extract_numpy=True)
 
@@ -292,10 +291,12 @@ def _get_values(
 
     dtype = values.dtype
 
+    datetimelike = False
     if needs_i8_conversion(values.dtype):
         # changing timedelta64/datetime64 to int64 needs to happen after
         #  finding `mask` above
         values = np.asarray(values.view("i8"))
+        datetimelike = True
 
     dtype_ok = _na_ok_dtype(dtype)
 
@@ -306,13 +307,13 @@ def _get_values(
     )
 
     if skipna and (mask is not None) and (fill_value is not None):
-        values = values.copy()
-        if dtype_ok and mask.any():
-            np.putmask(values, mask, fill_value)
-
-        # promote if needed
-        else:
-            values, _ = maybe_upcast_putmask(values, mask, fill_value)
+        if mask.any():
+            if dtype_ok or datetimelike:
+                values = values.copy()
+                np.putmask(values, mask, fill_value)
+            else:
+                # np.where will promote if needed
+                values = np.where(~mask, values, fill_value)
 
     # return a platform independent precision dtype
     dtype_max = dtype

From f04ec866120f134321fd63f0f0d170a8b8c98591 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 29 Nov 2020 11:37:00 -0800
Subject: [PATCH 15/15] CLN: remove unreachable in maybe_cast_result (#38152)

---
 pandas/core/dtypes/cast.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 3c4c811c94534..08e7671e0b674 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -296,7 +296,9 @@ def trans(x):
     return result
 
 
-def maybe_cast_result(result, obj: "Series", numeric_only: bool = False, how: str = ""):
+def maybe_cast_result(
+    result: ArrayLike, obj: "Series", numeric_only: bool = False, how: str = ""
+) -> ArrayLike:
     """
     Try casting result to a different type if appropriate
 
@@ -319,19 +321,20 @@ def maybe_cast_result(result, obj: "Series", numeric_only: bool = False, how: st
     dtype = obj.dtype
     dtype = maybe_cast_result_dtype(dtype, how)
 
-    if not is_scalar(result):
-        if (
-            is_extension_array_dtype(dtype)
-            and not is_categorical_dtype(dtype)
-            and dtype.kind != "M"
-        ):
-            # We have to special case categorical so as not to upcast
-            # things like counts back to categorical
-            cls = dtype.construct_array_type()
-            result = maybe_cast_to_extension_array(cls, result, dtype=dtype)
+    assert not is_scalar(result)
+
+    if (
+        is_extension_array_dtype(dtype)
+        and not is_categorical_dtype(dtype)
+        and dtype.kind != "M"
+    ):
+        # We have to special case categorical so as not to upcast
+        # things like counts back to categorical
+        cls = dtype.construct_array_type()
+        result = maybe_cast_to_extension_array(cls, result, dtype=dtype)
 
-        elif numeric_only and is_numeric_dtype(dtype) or not numeric_only:
-            result = maybe_downcast_to_dtype(result, dtype)
+    elif numeric_only and is_numeric_dtype(dtype) or not numeric_only:
+        result = maybe_downcast_to_dtype(result, dtype)
 
     return result