Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
is_allowed_extension_array_dtype,
is_dict_like,
is_scalar,
is_valid_numpy_dtype,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -610,7 +611,7 @@ def _asarray_tuplesafe(values):

def _is_nested_tuple(possible_tuple):
return isinstance(possible_tuple, tuple) and any(
isinstance(value, tuple | list | slice) for value in possible_tuple
isinstance(value, list | slice) for value in possible_tuple
)


Expand Down Expand Up @@ -784,8 +785,12 @@ def concat(
indexes_coord_dtypes = {idx.coord_dtype for idx in indexes}
if len(indexes_coord_dtypes) == 1:
coord_dtype = next(iter(indexes_coord_dtypes))
else:
# Check if all dtypes are valid numpy dtypes before using np.result_type
# (e.g., pandas StringDtype is not a valid numpy dtype, GH#11317)
elif all(is_valid_numpy_dtype(dt) for dt in indexes_coord_dtypes):
coord_dtype = np.result_type(*indexes_coord_dtypes)
else:
coord_dtype = np.dtype("O")

return cls(new_pd_index, dim=dim, coord_dtype=coord_dtype)

Expand Down Expand Up @@ -914,7 +919,12 @@ def join(
index = self.index.intersection(other.index)
if is_allowed_extension_array_dtype(index.dtype):
return type(self)(index, self.dim)
coord_dtype = np.result_type(self.coord_dtype, other.coord_dtype)
if is_valid_numpy_dtype(self.coord_dtype) and is_valid_numpy_dtype(
other.coord_dtype
):
coord_dtype = np.result_type(self.coord_dtype, other.coord_dtype)
else:
coord_dtype = np.dtype("O")
return type(self)(index, self.dim, coord_dtype=coord_dtype)

def reindex_like(
Expand Down
10 changes: 10 additions & 0 deletions xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1811,3 +1811,13 @@ def test_concat_different_dims_in_different_child(self):
actual = concat([dt1, dt2], dim="x")
expected = DataTree.from_dict(coords={"/first/x": [1, 3], "/second/x": [2, 4]})
assert actual.identical(expected)


def test_concat_string_dtype_from_pd_index():
# Regression test for GH#11317: concat fails due to StringDtype introduced by pd.Index
da = DataArray([0], dims=["dim_a"], coords=dict(dim_a=["a"]))
db = DataArray([0])
db2 = concat([db], pd.Index(["b"], name="dim_a"))
result = concat([da, db2], dim="dim_a")
assert result.sizes["dim_a"] == 2
assert list(result.coords["dim_a"].values) == ["a", "b"]
22 changes: 22 additions & 0 deletions xarray/tests/test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,28 @@ def test_sel(self) -> None:
with pytest.raises(IndexError):
index.sel({"x": (slice(None), 1, "no_level")})

def test_sel_nested_tuple_key(self) -> None:
"""Test that tuple-valued MultiIndex levels can be selected with a single key.

Regression test for GH#11341: when a MultiIndex level contains tuples,
selecting with a nested tuple key ((1, 1), 2) should collapse the dimension
just like selecting with a non-nested tuple key (1, 2).
"""
# Create a MultiIndex where the first level contains tuples
nested_level_0 = pd.Index(
[(1, 1), (1, 1), (2, 2), (3, 3)], name="a", tupleize_cols=False
)
nested_level_1 = pd.Index([1, 2, 10, 20], name="b")
nested_mi = pd.MultiIndex.from_arrays([nested_level_0, nested_level_1])

index = PandasMultiIndex(nested_mi, "index")

# Select with a nested tuple key - should return scalar indexer
actual = index.sel({"index": ((1, 1), 2)})
# pandas.get_loc returns an integer for exact match
expected_dim_indexers = {"index": 1}
assert actual.dim_indexers == expected_dim_indexers

def test_join(self):
midx = pd.MultiIndex.from_product([["a", "aa"], [1, 2]], names=("one", "two"))
level_coords_dtype = {"one": "=U2", "two": "i"}
Expand Down
Loading