diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 4ac823e188692..e64873c3f9662 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -302,7 +302,7 @@ Strings Interval ^^^^^^^^ - Bug in :class:`IntervalArray` and :class:`IntervalIndex` constructors unnecessarily upcasting sub-64-bit numeric dtypes (e.g. ``float32``, ``int32``) to 64-bit (:issue:`45412`) -- +- Bug in :func:`cut` and other operations building an :class:`IntervalIndex` engine raising ``TypeError`` on 32-bit platforms when there were more than 100 intervals (:issue:`44075`, :issue:`23440`) Indexing ^^^^^^^^ diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index b94f60c272e5d..aa68ef1d198f5 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -193,7 +193,10 @@ cdef class IntervalTree(IntervalMixin): cdef take(ndarray source, ndarray indices): """Take the given positions from a 1D ndarray """ - return PyArray_Take(source, indices, 0) + # GH#23440, GH#44075: the positions we build are int64, but PyArray_Take + # requires intp indices and rejects the int64->int32 "safe" cast on 32-bit + # platforms. On 64-bit intp is int64, so this is a no-op there. + return PyArray_Take(source, indices.astype(np.intp, copy=False), 0) cdef sort_values_and_indices(all_values, all_indices, subset): diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py index 4389b7c06ede9..e56ae4144cd5e 100644 --- a/pandas/tests/indexes/interval/test_interval_tree.py +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -4,24 +4,11 @@ import pytest from pandas._libs.interval import IntervalTree -from pandas.compat import ( - IS64, - WASM, -) import pandas._testing as tm -def skipif_32bit(param): - """ - Skip parameters in a parametrize on 32bit systems. Specifically used - here to skip leaf_size parameters related to GH 23440. - """ - marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit") - return pytest.param(param, marks=marks) - - -@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10]) +@pytest.fixture(params=[1, 2, 10]) def leaf_size(request): """ Fixture to specify IntervalTree leaf_size parameter; to be used with the @@ -120,9 +107,7 @@ def test_duplicates(self, dtype): expected = np.array([], dtype="intp") tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize( - "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000] - ) + @pytest.mark.parametrize("leaf_size", [1, 10, 100, 10000]) def test_get_indexer_closed(self, closed, leaf_size): x = np.arange(1000, dtype="float64") found = x.astype("intp") @@ -178,7 +163,6 @@ def test_is_overlapping_trivial(self, closed, left, right): tree = IntervalTree(left, right, closed=closed) assert tree.is_overlapping is False - @pytest.mark.skipif(not IS64, reason="GH 23440") def test_construction_overflow(self): # GH 25485 left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101 @@ -189,7 +173,6 @@ def test_construction_overflow(self): expected = (50 + np.iinfo(np.int64).max) / 2 assert result == expected - @pytest.mark.xfail(WASM, reason="GH 23440") @pytest.mark.parametrize( "left, right, expected", [ diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index 3bab48aad8db2..9338647d0eb93 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -2,7 +2,6 @@ import pytest from pandas._libs import index as libindex -from pandas.compat import WASM import pandas as pd from pandas import ( @@ -210,7 +209,6 @@ def test_mi_intervalindex_slicing_with_scalar(self): expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value") tm.assert_series_equal(result, expected) - @pytest.mark.xfail(WASM, reason="GH 23440") @pytest.mark.parametrize("base", [101, 1010]) def test_reindex_behavior_with_interval_index(self, base): # GH 51826 diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py index 051dc7b98f2aa..2b670cb1be6fe 100644 --- a/pandas/tests/indexing/interval/test_interval_new.py +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.compat import WASM - from pandas import ( Index, Interval, @@ -214,7 +212,6 @@ def test_loc_getitem_missing_key_error_message( obj.loc[[4, 5, 6]] -@pytest.mark.xfail(WASM, reason="GH 23440") @pytest.mark.parametrize( "intervals", [ diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index a94d6448c94b0..bbc437f83d139 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -861,3 +861,17 @@ def test_cut_datetime_array_no_attributeerror(): tm.assert_categorical_equal( result, expected, check_dtype=True, check_category_order=True ) + + +def test_cut_int64_intervalindex_more_bins_than_leaf_size(): + # GH#44075 building the IntervalTree engine for >100 integer bins used to + # raise on 32-bit platforms (int64 indices could not be safely cast to + # intp inside PyArray_Take). + bins = IntervalIndex.from_breaks( + range(0, 102, 1), closed="left", dtype="interval[int64]" + ) + data = [1.2, np.nan, 10.2] + result = cut(data, bins) + + expected_codes = np.array([1, -1, 10], dtype=result.codes.dtype) + tm.assert_numpy_array_equal(result.codes, expected_codes)