diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 2242e57e482..33e974fec50 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -26,6 +26,7 @@ is_allowed_extension_array_dtype, is_dict_like, is_scalar, + is_valid_numpy_dtype, ) if TYPE_CHECKING: @@ -610,7 +611,7 @@ def _asarray_tuplesafe(values): def _is_nested_tuple(possible_tuple): return isinstance(possible_tuple, tuple) and any( - isinstance(value, tuple | list | slice) for value in possible_tuple + isinstance(value, list | slice) for value in possible_tuple ) @@ -784,8 +785,12 @@ def concat( indexes_coord_dtypes = {idx.coord_dtype for idx in indexes} if len(indexes_coord_dtypes) == 1: coord_dtype = next(iter(indexes_coord_dtypes)) - else: + # Check if all dtypes are valid numpy dtypes before using np.result_type + # (e.g., pandas StringDtype is not a valid numpy dtype, GH#11317) + elif all(is_valid_numpy_dtype(dt) for dt in indexes_coord_dtypes): coord_dtype = np.result_type(*indexes_coord_dtypes) + else: + coord_dtype = np.dtype("O") return cls(new_pd_index, dim=dim, coord_dtype=coord_dtype) @@ -914,7 +919,12 @@ def join( index = self.index.intersection(other.index) if is_allowed_extension_array_dtype(index.dtype): return type(self)(index, self.dim) - coord_dtype = np.result_type(self.coord_dtype, other.coord_dtype) + if is_valid_numpy_dtype(self.coord_dtype) and is_valid_numpy_dtype( + other.coord_dtype + ): + coord_dtype = np.result_type(self.coord_dtype, other.coord_dtype) + else: + coord_dtype = np.dtype("O") return type(self)(index, self.dim, coord_dtype=coord_dtype) def reindex_like( diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index bc98d72d50c..4ed0315dfca 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1811,3 +1811,13 @@ def test_concat_different_dims_in_different_child(self): actual = concat([dt1, dt2], dim="x") expected = DataTree.from_dict(coords={"/first/x": [1, 3], "/second/x": [2, 4]}) assert actual.identical(expected) + + +def test_concat_string_dtype_from_pd_index(): + # Regression test for GH#11317: concat fails due to StringDtype introduced by pd.Index + da = DataArray([0], dims=["dim_a"], coords=dict(dim_a=["a"])) + db = DataArray([0]) + db2 = concat([db], pd.Index(["b"], name="dim_a")) + result = concat([da, db2], dim="dim_a") + assert result.sizes["dim_a"] == 2 + assert list(result.coords["dim_a"].values) == ["a", "b"] diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index 94adcc3b935..0dbdaba5043 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -509,6 +509,28 @@ def test_sel(self) -> None: with pytest.raises(IndexError): index.sel({"x": (slice(None), 1, "no_level")}) + def test_sel_nested_tuple_key(self) -> None: + """Test that tuple-valued MultiIndex levels can be selected with a single key. + + Regression test for GH#11341: when a MultiIndex level contains tuples, + selecting with a nested tuple key ((1, 1), 2) should collapse the dimension + just like selecting with a non-nested tuple key (1, 2). + """ + # Create a MultiIndex where the first level contains tuples + nested_level_0 = pd.Index( + [(1, 1), (1, 1), (2, 2), (3, 3)], name="a", tupleize_cols=False + ) + nested_level_1 = pd.Index([1, 2, 10, 20], name="b") + nested_mi = pd.MultiIndex.from_arrays([nested_level_0, nested_level_1]) + + index = PandasMultiIndex(nested_mi, "index") + + # Select with a nested tuple key - should return scalar indexer + actual = index.sel({"index": ((1, 1), 2)}) + # pandas.get_loc returns an integer for exact match + expected_dim_indexers = {"index": 1} + assert actual.dim_indexers == expected_dim_indexers + def test_join(self): midx = pd.MultiIndex.from_product([["a", "aa"], [1, 2]], names=("one", "two")) level_coords_dtype = {"one": "=U2", "two": "i"}