From 238f7c8b4d535d0f712d4318c8aa43b01dbdde0d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 23 Jun 2026 14:03:55 -0700 Subject: [PATCH] BUG: DataFrame.__setitem__ silently dropping columns of a 2D value (GH#46544) Setting an existing column label with a multi-column 2D array silently kept only the first column. iset now raises, matching the new-column and DataFrame.loc paths. Co-Authored-By: Claude Opus 4.8 (1M context) --- doc/source/whatsnew/v3.1.0.rst | 1 + pandas/core/internals/managers.py | 7 +++++ pandas/tests/frame/indexing/test_setitem.py | 33 +++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 10f24f01e7688..c7190249e8b4e 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -311,6 +311,7 @@ Indexing - Bug in :meth:`Index.get_indexer` where ``method="pad"``, ``"backfill"``, or ``"nearest"`` returned incorrect results when the target contained ``NaT`` or ``NaN`` instead of ``-1`` (:issue:`32572`) - Bugs in setitem-with-expansion when adding new rows failing to keep the original dtype in some cases (:issue:`32346`, :issue:`15231`, :issue:`47503`, :issue:`6485`, :issue:`25383`, :issue:`52235`, :issue:`17026`, :issue:`56010`) - Bug in :meth:`DataFrame.__getitem__` raising ``InvalidIndexError`` when indexing with a tuple containing a ``slice`` on a :class:`DataFrame` with :class:`MultiIndex` columns (e.g., ``df[:, "t1"]``) (:issue:`26511`) +- Bug in :meth:`DataFrame.__setitem__` silently keeping only the first column when assigning a 2D array to an existing column label, instead of raising as the new-column and :meth:`DataFrame.loc` cases already did (:issue:`46544`) - Bug in :meth:`DataFrame.at` raising ``TypeError`` when accessing a :class:`MultiIndex` with a partial date string on a :class:`DatetimeIndex` level (:issue:`43395`) - Bug in :meth:`DataFrame.duplicated` returning an empty :class:`Series` without the DataFrame's index when the DataFrame had no columns (:issue:`61191`) - Bug in :meth:`DataFrame.iloc` setitem raising ``AttributeError`` when assigning a :class:`Series` or :class:`Index` with a nullable EA dtype (e.g. ``Int64``, ``Float64``, ``boolean``) into a column with a NumPy dtype (:issue:`47776`) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3faf9ee718f9c..81226af4f2cb8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1315,6 +1315,13 @@ def iset( # Check if we can use _iset_single fastpath loc = cast("int", loc) + if not value_is_extension_type and len(value) > 1: + # GH#46544 setting a single column with a 2D value; matches the + # check in self.insert. value has been transposed above, so + # len(value) is the number of columns in the original value. + raise ValueError( + f"Expected a 1D array, got an array with shape {value.T.shape}" + ) blkno = self.blknos[loc] blk = self.blocks[blkno] if len(blk._mgr_locs) == 1: # TODO: fastest way to check this? diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 91e38e7cd72bf..00772af616d30 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -29,6 +29,7 @@ PeriodIndex, Series, Timestamp, + concat, cut, date_range, notna, @@ -740,6 +741,38 @@ def test_setitem_npmatrix_2d(self): tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("cols", [3, 1]) + def test_setitem_2d_existing_column_raises(self, cols): + # GH#46544 setting an existing column with a multi-column 2D array used + # to silently keep only the first column; it should raise like the + # new-column and .loc cases do. cols=1 exercises the single-column + # Block fastpath, cols=3 the consolidated-block path. + df = DataFrame(np.zeros((4, cols))) + msg = r"Expected a 1D array, got an array with shape \(4, 2\)" + with pytest.raises(ValueError, match=msg): + df[0] = np.arange(8).reshape(4, 2) + + def test_setitem_2d_single_column_ok(self): + # GH#46544 an (N, 1) array is still a single column and is allowed + df = DataFrame(np.zeros((4, 2))) + df[0] = np.arange(4).reshape(4, 1) + expected = DataFrame({0: np.arange(4), 1: np.zeros(4)}) + tm.assert_frame_equal(df, expected) + + def test_setitem_2d_duplicate_columns_ok(self): + # GH#46544 when the key maps to multiple columns, a matching 2D value + # fills them and must keep working + df = DataFrame(np.zeros((4, 3)), columns=[0, 0, 1]) + df[0] = np.arange(8).reshape(4, 2) + expected = concat( + [ + DataFrame(np.arange(8).reshape(4, 2), columns=[0, 0]), + DataFrame(np.zeros((4, 1)), columns=[1]), + ], + axis=1, + ) + tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("vals", [{}, {"d": "a"}]) def test_setitem_aligning_dict_with_index(self, vals): # GH#47216