Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ Strings
Interval
^^^^^^^^
- Bug in :class:`IntervalArray` and :class:`IntervalIndex` constructors unnecessarily upcasting sub-64-bit numeric dtypes (e.g. ``float32``, ``int32``) to 64-bit (:issue:`45412`)
-
- Bug in :func:`cut` and other operations building an :class:`IntervalIndex` engine raising ``TypeError`` on 32-bit platforms when there were more than 100 intervals (:issue:`44075`, :issue:`23440`)

Indexing
^^^^^^^^
Expand Down
5 changes: 4 additions & 1 deletion pandas/_libs/intervaltree.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,10 @@ cdef class IntervalTree(IntervalMixin):
cdef take(ndarray source, ndarray indices):
"""Take the given positions from a 1D ndarray
"""
return PyArray_Take(source, indices, 0)
# GH#23440, GH#44075: the positions we build are int64, but PyArray_Take
# requires intp indices and rejects the int64->int32 "safe" cast on 32-bit
# platforms. On 64-bit intp is int64, so this is a no-op there.
return PyArray_Take(source, indices.astype(np.intp, copy=False), 0)


cdef sort_values_and_indices(all_values, all_indices, subset):
Expand Down
21 changes: 2 additions & 19 deletions pandas/tests/indexes/interval/test_interval_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,11 @@
import pytest

from pandas._libs.interval import IntervalTree
from pandas.compat import (
IS64,
WASM,
)

import pandas._testing as tm


def skipif_32bit(param):
"""
Skip parameters in a parametrize on 32bit systems. Specifically used
here to skip leaf_size parameters related to GH 23440.
"""
marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit")
return pytest.param(param, marks=marks)


@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
@pytest.fixture(params=[1, 2, 10])
def leaf_size(request):
"""
Fixture to specify IntervalTree leaf_size parameter; to be used with the
Expand Down Expand Up @@ -120,9 +107,7 @@ def test_duplicates(self, dtype):
expected = np.array([], dtype="intp")
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize(
"leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
)
@pytest.mark.parametrize("leaf_size", [1, 10, 100, 10000])
def test_get_indexer_closed(self, closed, leaf_size):
x = np.arange(1000, dtype="float64")
found = x.astype("intp")
Expand Down Expand Up @@ -178,7 +163,6 @@ def test_is_overlapping_trivial(self, closed, left, right):
tree = IntervalTree(left, right, closed=closed)
assert tree.is_overlapping is False

@pytest.mark.skipif(not IS64, reason="GH 23440")
def test_construction_overflow(self):
# GH 25485
left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
Expand All @@ -189,7 +173,6 @@ def test_construction_overflow(self):
expected = (50 + np.iinfo(np.int64).max) / 2
assert result == expected

@pytest.mark.xfail(WASM, reason="GH 23440")
@pytest.mark.parametrize(
"left, right, expected",
[
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/indexing/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest

from pandas._libs import index as libindex
from pandas.compat import WASM

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -210,7 +209,6 @@ def test_mi_intervalindex_slicing_with_scalar(self):
expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(WASM, reason="GH 23440")
@pytest.mark.parametrize("base", [101, 1010])
def test_reindex_behavior_with_interval_index(self, base):
# GH 51826
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/indexing/interval/test_interval_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas.compat import WASM

from pandas import (
Index,
Interval,
Expand Down Expand Up @@ -214,7 +212,6 @@ def test_loc_getitem_missing_key_error_message(
obj.loc[[4, 5, 6]]


@pytest.mark.xfail(WASM, reason="GH 23440")
@pytest.mark.parametrize(
"intervals",
[
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/reshape/test_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,3 +861,17 @@ def test_cut_datetime_array_no_attributeerror():
tm.assert_categorical_equal(
result, expected, check_dtype=True, check_category_order=True
)


def test_cut_int64_intervalindex_more_bins_than_leaf_size():
# GH#44075 building the IntervalTree engine for >100 integer bins used to
# raise on 32-bit platforms (int64 indices could not be safely cast to
# intp inside PyArray_Take).
bins = IntervalIndex.from_breaks(
range(0, 102, 1), closed="left", dtype="interval[int64]"
)
data = [1.2, np.nan, 10.2]
result = cut(data, bins)

expected_codes = np.array([1, -1, 10], dtype=result.codes.dtype)
tm.assert_numpy_array_equal(result.codes, expected_codes)