From 8e0213dbe8ca7881e683c8cec14959dc334d46d9 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 27 Jan 2026 11:57:00 +0100
Subject: [PATCH 01/18] Remove `in1d` stub from documentation (#2744)

The PR removes TODO implementation stub for `in1d`, because it was
dropped in NumPy 2.4 release and so no plans to add it to dpnp either.

Also this PR refreshes third party tests with recent updates in the
remote repo.
---
 doc/reference/set.rst                         |   1 -
 .../cupy/core_tests/test_elementwise.py       |  62 ++--------
 .../cupy/core_tests/test_function.py          |   5 +-
 .../cupy/core_tests/test_include.py           |  28 +++--
 .../core_tests/test_ndarray_complex_ops.py    |  10 +-
 .../core_tests/test_ndarray_contiguity.py     |   4 +-
 .../core_tests/test_ndarray_conversion.py     |  67 +++++-----
 .../cupy/core_tests/test_ndarray_owndata.py   |   6 +-
 .../third_party/cupy/core_tests/test_scan.py  |   8 +-
 .../cupy/core_tests/test_syncdetect.py        |   5 +-
 .../cupy/random_tests/test_distributions.py   | 114 ++++++++++++++++++
 .../cupy/random_tests/test_generator.py       |   2 +-
 .../cupy/statistics_tests/test_order.py       |  20 +++
 13 files changed, 219 insertions(+), 113 deletions(-)

diff --git a/doc/reference/set.rst b/doc/reference/set.rst
index 702e58bb87d5..73719fba74bc 100644
--- a/doc/reference/set.rst
+++ b/doc/reference/set.rst
@@ -23,7 +23,6 @@ Boolean operations
    :toctree: generated/
    :nosignatures:
 
-   in1d
    intersect1d
    isin
    setdiff1d
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_elementwise.py b/dpnp/tests/third_party/cupy/core_tests/test_elementwise.py
index 729468948209..b2d6e65cd37a 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_elementwise.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_elementwise.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import numpy
 import pytest
@@ -12,7 +12,7 @@
 from dpnp.tests.third_party.cupy import testing
 
 
-class TestElementwise(unittest.TestCase):
+class TestElementwise:
 
     def check_copy(self, dtype, src_id, dst_id):
         with cuda.Device(src_id):
@@ -33,7 +33,7 @@ def test_copy(self, dtype):
     def test_copy_multigpu_nopeer(self, dtype):
         if cuda.runtime.deviceCanAccessPeer(0, 1) == 1:
             pytest.skip("peer access is available")
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             self.check_copy(dtype, 0, 1)
 
     @pytest.mark.skip("elementwise_copy() argument isn't supported")
@@ -74,10 +74,10 @@ def test_copy_orders(self, order):
 
 
 @pytest.mark.skip("`ElementwiseKernel` isn't supported")
-class TestElementwiseInvalidShape(unittest.TestCase):
+class TestElementwiseInvalidShape:
 
     def test_invalid_shape(self):
-        with self.assertRaisesRegex(ValueError, "Out shape is mismatched"):
+        with pytest.raises(ValueError, match="Out shape is mismatched"):
             f = cupy.ElementwiseKernel("T x", "T y", "y += x")
             x = cupy.arange(12).reshape(3, 4)
             y = cupy.arange(4)
@@ -85,16 +85,15 @@ def test_invalid_shape(self):
 
 
 @pytest.mark.skip("`ElementwiseKernel` isn't supported")
-class TestElementwiseInvalidArgument(unittest.TestCase):
+class TestElementwiseInvalidArgument:
 
     def test_invalid_kernel_name(self):
-        with self.assertRaisesRegex(ValueError, "Invalid kernel name"):
+        with pytest.raises(ValueError, match="Invalid kernel name"):
             cupy.ElementwiseKernel("T x", "", "", "1")
 
 
-class TestElementwiseType(unittest.TestCase):
+class TestElementwiseType:
 
-    @testing.with_requires("numpy>=2.0")
     @testing.for_int_dtypes(no_bool=True)
     @testing.numpy_cupy_array_equal(accept_error=OverflowError)
     def test_large_int_upper_1(self, xp, dtype):
@@ -105,14 +104,6 @@ def test_large_int_upper_1(self, xp, dtype):
     @testing.for_int_dtypes(no_bool=True)
     @testing.numpy_cupy_array_equal(accept_error=OverflowError)
     def test_large_int_upper_2(self, xp, dtype):
-        if numpy_version() < "2.0.0":
-            flag = dtype in [xp.int16, xp.int32, xp.int64, xp.longlong]
-            if xp.issubdtype(dtype, xp.unsignedinteger) or flag:
-                pytest.skip("numpy doesn't raise OverflowError")
-
-            if dtype in [xp.int8, xp.intc] and is_win_platform():
-                pytest.skip("numpy promotes dtype differently")
-
         a = xp.array([1], dtype=xp.int8)
         b = xp.iinfo(dtype).max - 1
         return a + b
@@ -121,48 +112,31 @@ def test_large_int_upper_2(self, xp, dtype):
     @testing.numpy_cupy_array_equal()
     def test_large_int_upper_3(self, xp, dtype):
         if (
-            numpy.issubdtype(dtype, numpy.unsignedinteger)
-            and numpy_version() < "2.0.0"
-        ):
-            pytest.skip("numpy promotes dtype differently")
-        elif (
             dtype in (numpy.uint64, numpy.ulonglong)
             and not has_support_aspect64()
         ):
             pytest.skip("no fp64 support")
 
         a = xp.array([xp.iinfo(dtype).max], dtype=dtype)
-        b = numpy.int8(0)
+        b = xp.int8(0)
         return a + b
 
     @testing.for_int_dtypes(no_bool=True)
     @testing.numpy_cupy_array_equal()
     def test_large_int_upper_4(self, xp, dtype):
         if (
-            numpy.issubdtype(dtype, numpy.unsignedinteger)
-            and numpy_version() < "2.0.0"
-        ):
-            pytest.skip("numpy promotes dtype differently")
-        elif (
             dtype in (numpy.uint64, numpy.ulonglong)
             and not has_support_aspect64()
         ):
             pytest.skip("no fp64 support")
 
         a = xp.array([xp.iinfo(dtype).max - 1], dtype=dtype)
-        b = numpy.int8(1)
+        b = xp.int8(1)
         return a + b
 
     @testing.for_int_dtypes(no_bool=True)
     @testing.numpy_cupy_array_equal(accept_error=OverflowError)
     def test_large_int_lower_1(self, xp, dtype):
-        if numpy_version() < "2.0.0":
-            if dtype in [xp.int16, xp.int32, xp.int64, xp.longlong]:
-                pytest.skip("numpy doesn't raise OverflowError")
-
-            if dtype in [xp.int8, xp.intc] and is_win_platform():
-                pytest.skip("numpy promotes dtype differently")
-
         a = xp.array([0], dtype=xp.int8)
         b = xp.iinfo(dtype).min
         return a + b
@@ -170,13 +144,6 @@ def test_large_int_lower_1(self, xp, dtype):
     @testing.for_int_dtypes(no_bool=True)
     @testing.numpy_cupy_array_equal(accept_error=OverflowError)
     def test_large_int_lower_2(self, xp, dtype):
-        if numpy_version() < "2.0.0":
-            if dtype in [xp.int16, xp.int32, xp.int64, xp.longlong]:
-                pytest.skip("numpy doesn't raise OverflowError")
-
-            if dtype in [xp.int8, xp.intc] and is_win_platform():
-                pytest.skip("numpy promotes dtype differently")
-
         a = xp.array([-1], dtype=xp.int8)
         b = xp.iinfo(dtype).min + 1
         return a + b
@@ -185,18 +152,13 @@ def test_large_int_lower_2(self, xp, dtype):
     @testing.numpy_cupy_array_equal()
     def test_large_int_lower_3(self, xp, dtype):
         if (
-            numpy.issubdtype(dtype, numpy.unsignedinteger)
-            and numpy_version() < "2.0.0"
-        ):
-            pytest.skip("numpy promotes dtype differently")
-        elif (
             dtype in (numpy.uint64, numpy.ulonglong)
             and not has_support_aspect64()
         ):
             pytest.skip("no fp64 support")
 
         a = xp.array([xp.iinfo(dtype).min], dtype=dtype)
-        b = numpy.int8(0)
+        b = xp.int8(0)
         return a + b
 
     @testing.for_int_dtypes(no_bool=True)
@@ -209,5 +171,5 @@ def test_large_int_lower_4(self, xp, dtype):
             pytest.skip("no fp64 support")
 
         a = xp.array([xp.iinfo(dtype).min + 1], dtype=dtype)
-        b = numpy.int8(-1)
+        b = xp.int8(-1)
         return a + b
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_function.py b/dpnp/tests/third_party/cupy/core_tests/test_function.py
index 5480cdf6e126..1fa4fdbac46a 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_function.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_function.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
-import unittest
-
+import numpy
 import pytest
 
 import dpnp as cupy
@@ -23,7 +22,7 @@ def _compile_func(kernel_name, code):
     return mod.get_function(kernel_name)
 
 
-class TestFunction(unittest.TestCase):
+class TestFunction:
 
     def test_python_scalar(self):
         code = """
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_include.py b/dpnp/tests/third_party/cupy/core_tests/test_include.py
index a45d2b40cbf4..1e738f7977bf 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_include.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_include.py
@@ -1,5 +1,6 @@
+from __future__ import annotations
+
 import os
-from unittest import mock
 
 import pytest
 
@@ -71,22 +72,23 @@ def test_nvcc(self):
                 _code_nvcc, options=options, arch=arch
             )
 
-    def test_nvrtc(self):
+    def test_nvrtc(self, monkeypatch):
         cuda_ver = cupy.cuda.runtime.runtimeGetVersion()
         options = self._get_options()
         for arch in self._get_cuda_archs():
-            with mock.patch(
-                "cupy.cuda.compiler._get_arch_for_options_for_nvrtc",
+            monkeypatch.setattr(
+                cupy.cuda.compiler,
+                "_get_arch_for_options_for_nvrtc",
                 lambda _: (f"-arch=compute_{arch}", "ptx"),
-            ):
+            )
+            cupy.cuda.compiler.compile_using_nvrtc(_code_nvrtc, options=options)
+
+            if cuda_ver >= 11010:
+                monkeypatch.setattr(
+                    cupy.cuda.compiler,
+                    "_get_arch_for_options_for_nvrtc",
+                    lambda _: (f"-arch=sm_{arch}", "cubin"),
+                )
                 cupy.cuda.compiler.compile_using_nvrtc(
                     _code_nvrtc, options=options
                 )
-            if cuda_ver >= 11010:
-                with mock.patch(
-                    "cupy.cuda.compiler._get_arch_for_options_for_nvrtc",
-                    lambda _: (f"-arch=sm_{arch}", "cubin"),
-                ):
-                    cupy.cuda.compiler.compile_using_nvrtc(
-                        _code_nvrtc, options=options
-                    )
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py
index 3acebaaeb3ad..a2fe0e2f256c 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import numpy
 import pytest
@@ -7,7 +7,7 @@
 from dpnp.tests.third_party.cupy import testing
 
 
-class TestConj(unittest.TestCase):
+class TestConj:
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_almost_equal()
@@ -38,7 +38,7 @@ def test_conjugate_pass(self, xp, dtype):
         return y
 
 
-class TestAngle(unittest.TestCase):
+class TestAngle:
 
     # For dtype=int8, uint8, NumPy returns float16, but dpnp returns float32
     # so type_check=False
@@ -49,7 +49,7 @@ def test_angle(self, xp, dtype):
         return xp.angle(x)
 
 
-class TestRealImag(unittest.TestCase):
+class TestRealImag:
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_almost_equal(accept_error=False)
@@ -157,7 +157,7 @@ def test_imag_inplace(self, dtype):
         assert cupy.all(x == expected)
 
 
-class TestScalarConversion(unittest.TestCase):
+class TestScalarConversion:
 
     @testing.for_all_dtypes()
     def test_scalar_conversion(self, dtype):
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_contiguity.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_contiguity.py
index 7331105f3b7b..f28364481c9c 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_contiguity.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_contiguity.py
@@ -1,9 +1,9 @@
-import unittest
+from __future__ import annotations
 
 from dpnp.tests.third_party.cupy import testing
 
 
-class TestArrayContiguity(unittest.TestCase):
+class TestArrayContiguity:
 
     def test_is_contiguous(self):
         a = testing.shaped_arange((2, 3, 4))
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_conversion.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_conversion.py
index efd888094c94..4643dd2a60bc 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_conversion.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_conversion.py
@@ -1,56 +1,69 @@
 from __future__ import annotations
 
-import unittest
-
 import numpy
 import pytest
 
 import dpnp as cupy
+
+# from cupy.cuda import runtime
 from dpnp.tests.third_party.cupy import testing
 
 
-@testing.parameterize(
-    {"shape": ()},
-    {"shape": (1,)},
-    {"shape": (1, 1, 1)},
+@pytest.mark.parametrize(
+    "shape",
+    [
+        (),
+        (1,),
+        (1, 1, 1),
+    ],
 )
-class TestNdarrayItem(unittest.TestCase):
+class TestNdarrayItem:
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_equal()
-    def test_item(self, xp, dtype):
-        a = xp.full(self.shape, 3, dtype=dtype)
+    def test_item(self, xp, dtype, shape):
+        a = xp.full(shape, 3, dtype=dtype)
         return a.item()
 
 
-@testing.parameterize(
-    {"shape": (0,)},
-    {"shape": (2, 3)},
-    {"shape": (1, 0, 1)},
+@pytest.mark.parametrize(
+    "shape",
+    [
+        (0,),
+        (2, 3),
+        (1, 0, 1),
+    ],
 )
-class TestNdarrayItemRaise(unittest.TestCase):
+class TestNdarrayItemRaise:
 
-    def test_item(self):
+    def test_item(self, shape):
         for xp in (numpy, cupy):
-            a = testing.shaped_arange(self.shape, xp, xp.float32)
+            a = testing.shaped_arange(shape, xp, xp.float32)
             with pytest.raises(ValueError):
                 a.item()
 
 
-@testing.parameterize(
-    {"shape": ()},
-    {"shape": (1,)},
-    {"shape": (2, 3)},
-    {"shape": (2, 3), "order": "C"},
-    {"shape": (2, 3), "order": "F"},
+@pytest.mark.parametrize(
+    "shape, order",
+    [
+        ((), None),
+        ((1,), None),
+        ((2, 3), None),
+        ((2, 3), "C"),
+        ((2, 3), "F"),
+    ],
 )
-class TestNdarrayToBytes(unittest.TestCase):
+class TestNdarrayToBytes:
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_equal()
-    def test_item(self, xp, dtype):
-        a = testing.shaped_arange(self.shape, xp, dtype)
-        if hasattr(self, "order"):
-            return a.tobytes(self.order)
+    def test_item(self, xp, dtype, shape, order):
+        # if runtime.is_hip and (
+        #     shape == (1,) or (shape == (2, 3) and order is None)
+        # ):
+        #     pytest.xfail("ROCm/HIP may have a bug")
+        a = testing.shaped_arange(shape, xp, dtype)
+        if order is not None:
+            return a.tobytes(order)
         else:
             return a.tobytes()
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_owndata.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_owndata.py
index dee220ab01fa..c447f52a8e39 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_owndata.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_owndata.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import pytest
 
@@ -7,9 +7,9 @@
 pytest.skip("owndata attribute is not supported", allow_module_level=True)
 
 
-class TestArrayOwndata(unittest.TestCase):
+class TestArrayOwndata:
 
-    def setUp(self):
+    def setup_method(self):
         self.a = _core.ndarray(())
 
     def test_original_array(self):
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_scan.py b/dpnp/tests/third_party/cupy/core_tests/test_scan.py
index 15eb0a653b16..111e4d5490f3 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_scan.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_scan.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import pytest
 
@@ -11,12 +11,11 @@
 pytest.skip("scan() is not supported", allow_module_level=True)
 
 
-class TestScan(unittest.TestCase):
+class TestScan:
 
     @testing.for_all_dtypes()
     def test_scan(self, dtype):
         element_num = 10000
-
         if dtype in {cupy.int8, cupy.uint8, cupy.float16}:
             element_num = 100
 
@@ -27,7 +26,7 @@ def test_scan(self, dtype):
         testing.assert_array_equal(prefix_sum, expect)
 
     def test_check_1d_array(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             a = cupy.zeros((2, 2))
             scan(a)
 
@@ -43,7 +42,6 @@ def test_multi_gpu(self):
     @testing.for_all_dtypes()
     def test_scan_out(self, dtype):
         element_num = 10000
-
         if dtype in {cupy.int8, cupy.uint8, cupy.float16}:
             element_num = 100
 
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_syncdetect.py b/dpnp/tests/third_party/cupy/core_tests/test_syncdetect.py
index 57f64a7b1661..855433f53e3d 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_syncdetect.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_syncdetect.py
@@ -1,4 +1,4 @@
-import unittest
+from __future__ import annotations
 
 import pytest
 
@@ -9,8 +9,7 @@
 pytest.skip("get() method is not supported", allow_module_level=True)
 
 
-class TestSyncDetect(unittest.TestCase):
-
+class TestSyncDetect:
     def test_disallowed(self):
         a = cupy.array([2, 3])
         with cupyx.allow_synchronize(False):
diff --git a/dpnp/tests/third_party/cupy/random_tests/test_distributions.py b/dpnp/tests/third_party/cupy/random_tests/test_distributions.py
index ebed860fd294..b72edc4c90c9 100644
--- a/dpnp/tests/third_party/cupy/random_tests/test_distributions.py
+++ b/dpnp/tests/third_party/cupy/random_tests/test_distributions.py
@@ -1,3 +1,7 @@
+from __future__ import annotations
+
+import concurrent.futures
+
 import numpy
 import pytest
 
@@ -1000,3 +1004,113 @@ class TestDistributionsZipf(RandomDistributionsTestCase):
     def test_zipf(self, a_dtype):
         a = numpy.full(self.a_shape, 2, dtype=a_dtype)
         self.check_distribution("zipf", {"a": a})
+
+
+@pytest.mark.parametrize(
+    "dist_func",
+    [
+        # pytest.param(lambda rs: rs.beta(3.0, 3.0, size=10), id="beta"),
+        # pytest.param(lambda rs: rs.binomial(5, 0.5, size=10), id="binomial"),
+        # pytest.param(lambda rs: rs.chisquare(5.0, size=10), id="chisquare"),
+        # pytest.param(
+        #     lambda rs: rs.dirichlet([1.0, 1.0, 1.0], size=10), id="dirichlet"
+        # ),
+        # pytest.param(lambda rs: rs.exponential(1.0, size=10), id="exponential"),
+        # pytest.param(lambda rs: rs.f(5.0, 5.0, size=10), id="f"),
+        # pytest.param(lambda rs: rs.gamma(5.0, 1.0, size=10), id="gamma"),
+        # pytest.param(lambda rs: rs.geometric(0.5, size=10), id="geometric"),
+        # pytest.param(lambda rs: rs.gumbel(0.0, 1.0, size=10), id="gumbel"),
+        # pytest.param(
+        #     lambda rs: rs.hypergeometric(10, 10, 5, size=10),
+        #     id="hypergeometric",
+        # ),
+        # pytest.param(lambda rs: rs.laplace(0.0, 1.0, size=10), id="laplace"),
+        # pytest.param(lambda rs: rs.logistic(0.0, 1.0, size=10), id="logistic"),
+        # pytest.param(
+        #     lambda rs: rs.lognormal(0.0, 1.0, size=10), id="lognormal"
+        # ),
+        # pytest.param(lambda rs: rs.logseries(0.5, size=10), id="logseries"),
+        # pytest.param(
+        #     lambda rs: rs.multivariate_normal(
+        #         [0.0, 0.0], [[1.0, 0.0], [0.0, 1.0]], size=10
+        #     ),
+        #     id="multivariate_normal",
+        # ),
+        # pytest.param(
+        #     lambda rs: rs.negative_binomial(5, 0.5, size=10),
+        #     id="negative_binomial",
+        # ),
+        # pytest.param(
+        #     lambda rs: rs.noncentral_chisquare(5.0, 1.0, size=10),
+        #     id="noncentral_chisquare",
+        # ),
+        # pytest.param(
+        #     lambda rs: rs.noncentral_f(5.0, 5.0, 1.0, size=10),
+        #     id="noncentral_f",
+        # ),
+        pytest.param(lambda rs: rs.normal(0.0, 1.0, size=10), id="normal"),
+        # pytest.param(lambda rs: rs.pareto(3.0, size=10), id="pareto"),
+        # pytest.param(lambda rs: rs.poisson(5.0, size=10), id="poisson"),
+        # pytest.param(lambda rs: rs.power(0.5, size=10), id="power"),
+        pytest.param(lambda rs: rs.random_sample(size=10), id="random_sample"),
+        # pytest.param(lambda rs: rs.rayleigh(1.0, size=10), id="rayleigh"),
+        # pytest.param(
+        #     lambda rs: rs.standard_cauchy(size=10), id="standard_cauchy"
+        # ),
+        # pytest.param(
+        #     lambda rs: rs.standard_exponential(size=10),
+        #     id="standard_exponential",
+        # ),
+        # pytest.param(
+        #     lambda rs: rs.standard_gamma(5.0, size=10), id="standard_gamma"
+        # ),
+        pytest.param(
+            lambda rs: rs.standard_normal(size=10), id="standard_normal"
+        ),
+        # pytest.param(lambda rs: rs.standard_t(5.0, size=10), id="standard_t"),
+        # pytest.param(
+        #     lambda rs: rs.triangular(-1.0, 0.0, 2.0, size=10), id="triangular"
+        # ),
+        pytest.param(lambda rs: rs.uniform(0.0, 1.0, size=10), id="uniform"),
+        # pytest.param(lambda rs: rs.vonmises(0.0, 1.0, size=10), id="vonmises"),
+        # pytest.param(lambda rs: rs.wald(3.0, 3.0, size=10), id="wald"),
+        # pytest.param(lambda rs: rs.weibull(1.0, size=10), id="weibull"),
+        # pytest.param(lambda rs: rs.zipf(2.0, size=10), id="zipf"),
+        # # Integers and shuffles
+        # pytest.param(lambda rs: rs.choice(100, size=10), id="choice-number"),
+        # pytest.param(
+        #     lambda rs: rs.choice(cupy.arange(10), size=10), id="choice-array"
+        # ),
+        # pytest.param(lambda rs: rs.tomaxint(size=10), id="tomaxint"),
+        # # skipping shuffle (doesn't fit lambda and uses permutation)
+        # pytest.param(
+        #     lambda rs: rs.permutation(cupy.arange(20)), id="permutation"
+        # ),
+        pytest.param(lambda rs: rs.randint(0, 10, size=10), id="randint"),
+        pytest.param(lambda rs: rs.randn(10), id="randn"),
+    ],
+)
+# @pytest.mark.thread_unsafe("already multi-threaded")
+def test_multithreaded(dist_func):
+    n_threads = 10
+    rs = cupy.random.RandomState(seed=0)
+
+    def call_distribution(_):
+        return dist_func(rs)
+
+    # Run distribution in multiple threads with shared RandomState
+    with concurrent.futures.ThreadPoolExecutor(
+        max_workers=n_threads
+    ) as executor:
+        results = executor.map(call_distribution, range(n_threads))
+
+        results = list(results)
+
+    # Check that all results are finite
+    for result in results:
+        assert cupy.isfinite(result).all()
+
+    # Check that all results are different from each other
+    for i in range(len(results)):
+        for j in range(i + 1, len(results)):
+            assert not cupy.array_equal(results[i], results[j])
diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py
index abb58df07af9..23a86d88d8ff 100644
--- a/dpnp/tests/third_party/cupy/random_tests/test_generator.py
+++ b/dpnp/tests/third_party/cupy/random_tests/test_generator.py
@@ -917,7 +917,7 @@ def test_dtype_shape(self):
         if isinstance(self.a, numpy.ndarray):
             expected_dtype = "float"
         else:
-            expected_dtype = "long"
+            expected_dtype = "int64"
         assert v.dtype == expected_dtype
         assert v.shape == expected_shape
 
diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_order.py b/dpnp/tests/third_party/cupy/statistics_tests/test_order.py
index 58eb7999acc7..f35617e18619 100644
--- a/dpnp/tests/third_party/cupy/statistics_tests/test_order.py
+++ b/dpnp/tests/third_party/cupy/statistics_tests/test_order.py
@@ -28,6 +28,25 @@
 )
 
 
+@pytest.fixture
+def _fix_gamma(monkeypatch):
+    if numpy.__version__ == "2.4.1":
+        # NumPy 2.4.0 had a surprisingly large change, but I (seberg)
+        # incorrectly undid the change, making things maybe worse...
+        # this fixes that...
+        # See also https://github.com/numpy/numpy/pull/30710
+        def _get_gamma(virtual_indexes, previous_indexes, method):
+            gamma = numpy.asanyarray(virtual_indexes - previous_indexes)
+            gamma = method["fix_gamma"](gamma, virtual_indexes)
+            return numpy.asanyarray(gamma, dtype=virtual_indexes.dtype)
+
+        monkeypatch.setattr(
+            numpy.lib._function_base_impl, "_get_gamma", _get_gamma
+        )
+
+    yield
+
+
 def for_all_methods(name="method"):
     return pytest.mark.parametrize(name, _all_methods)
 
@@ -83,6 +102,7 @@ def test_quantile_unexpected_method(self, dtype):
 
 
 @pytest.mark.skip("dpnp.quantile() is not implemented yet")
+@pytest.mark.usefixtures("_fix_gamma")
 @testing.with_requires("numpy>=2.0")
 @for_all_methods()
 class TestQuantileMethods:

From b910c9237c32fb0e835d426dcd214d7af68891c3 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 27 Jan 2026 15:39:00 +0100
Subject: [PATCH 02/18] Improve description of `dpnp.fromfile` (#2745)

The PR aligns with recent change in NumPy and adds more clarity on the
description of the `file` positional argument on `dpnp.fromfile`
documentation.
---
 CHANGELOG.md                     | 1 +
 dpnp/dpnp_iface_arraycreation.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 69b06cb64bf8..24a4b9296aa4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -45,6 +45,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Updated `dpnp.fix` to reuse `dpnp.trunc` internally [#2722](https://github.com/IntelPython/dpnp/pull/2722)
 * Changed the build scripts and documentation due to `python setup.py develop` deprecation notice [#2716](https://github.com/IntelPython/dpnp/pull/2716)
 * Clarified behavior on repeated `axes` in `dpnp.tensordot` and `dpnp.linalg.tensordot` functions [#2733](https://github.com/IntelPython/dpnp/pull/2733)
+* Improved documentation of `file` argument in `dpnp.fromfile` [#2745](https://github.com/IntelPython/dpnp/pull/2745)
 
 ### Deprecated
 
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 8d4ebdd1a6c2..539df84d3638 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -1721,7 +1721,9 @@ def fromfile(
     Parameters
     ----------
     file : file or str or Path
-        Open file object or filename.
+        An open file object, a string containing the filename, or a Path object.
+        When reading from a file object it must support random access (i.e. it
+        must have tell and seek methods).
     dtype : {None, str, dtype object}, optional
         Data type of the returned array.
         For binary files, it is used to determine the size and byte-order

From 869ccb899e5962a0bd1535b052d4fcbafb6f5e9f Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Wed, 28 Jan 2026 12:24:04 +0100
Subject: [PATCH 03/18] Increase timeout to allow to complete Windows tests
 without rerunning (#2748)

The PR updates GitHub action with testing dpnp conda package.
---
 .github/workflows/conda-package.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 2e5539c8708d..d2ac90621aaa 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -20,7 +20,7 @@ env:
   test-env-name: 'test'
   rerun-tests-on-failure: 'true'
   rerun-tests-max-attempts: 2
-  rerun-tests-timeout: 40
+  rerun-tests-timeout: 45
 
 jobs:
   build:

From 258b3cd78144431c0769e87087eb2f32fdc3f2f4 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 31 Jan 2026 13:22:32 +0100
Subject: [PATCH 04/18] Weekly pre-commit autoupdate (#2750)

This PR updates the `.pre-commit-config.yaml` using `pre-commit
autoupdate`.
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 50b1175ffebc..ace139f8d179 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -127,7 +127,7 @@ repos:
     hooks:
     -   id: actionlint
 -   repo: https://github.com/BlankSpruce/gersemi
-    rev: 0.25.1
+    rev: 0.25.4
     hooks:
     -   id: gersemi
         exclude: "dpnp/backend/cmake/Modules/"

From d11f5b583beb2c0063333d8963c459dea13b2333 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Sat, 31 Jan 2026 15:08:56 +0100
Subject: [PATCH 05/18] Support tuple of integers for `axis` in
 `dpnp.trim_zeros` (#2746)

The PR aligns with NumPy 2.4 change and adds support for tuple of
integers passed with `axis` argument in `dpnp.trim_zeros`

- [x] Have you provided a meaningful PR description?
- [x] Have you added a test, reproducer or referred to an issue with a
reproducer?
- [x] Have you tested your changes locally for CPU and GPU devices?
- [x] Have you made sure that new changes do not introduce compiler
warnings?
- [ ] Have you checked performance impact of proposed changes?
- [x] Have you added documentation for your changes, if necessary?
- [x] Have you added your changes to the changelog?
---
 CHANGELOG.md                    |  1 +
 dpnp/dpnp_iface_manipulation.py | 24 ++++++-------
 dpnp/tests/test_manipulation.py | 60 +++++++++++++++++++++++++++++++--
 3 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24a4b9296aa4..4a94d41d131c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -46,6 +46,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Changed the build scripts and documentation due to `python setup.py develop` deprecation notice [#2716](https://github.com/IntelPython/dpnp/pull/2716)
 * Clarified behavior on repeated `axes` in `dpnp.tensordot` and `dpnp.linalg.tensordot` functions [#2733](https://github.com/IntelPython/dpnp/pull/2733)
 * Improved documentation of `file` argument in `dpnp.fromfile` [#2745](https://github.com/IntelPython/dpnp/pull/2745)
+* Aligned `dpnp.trim_zeros` with NumPy 2.4 to support a tuple of integers passed with `axis` keyword [#2746](https://github.com/IntelPython/dpnp/pull/2746)
 
 ### Deprecated
 
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 9df5278bd16b..dd872485a602 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -3983,7 +3983,7 @@ def trim_zeros(filt, trim="fb", axis=None):
         (or index -1).
 
         Default: ``"fb"``.
-    axis : {None, int}, optional
+    axis : {None, int, tuple of ints}, optional
         If ``None``, `filt` is cropped such that the smallest bounding box is
         returned that still contains all values which are not zero.
         If an `axis` is specified, `filt` will be sliced in that dimension only
@@ -4038,11 +4038,14 @@ def trim_zeros(filt, trim="fb", axis=None):
         raise ValueError(f"unexpected character(s) in `trim`: {trim!r}")
 
     nd = filt.ndim
-    if axis is not None:
-        axis = normalize_axis_index(axis, nd)
+    if axis is None:
+        axis = tuple(range(nd))
+    else:
+        axis = normalize_axis_tuple(axis, nd, argname="axis")
 
-    if filt.size == 0:
-        return filt  # no trailing zeros in empty array
+    # check if an empty array or no trimming requested
+    if filt.size == 0 or not axis:
+        return filt
 
     non_zero = dpnp.argwhere(filt)
     if non_zero.size == 0:
@@ -4061,13 +4064,10 @@ def trim_zeros(filt, trim="fb", axis=None):
         else:
             stop = (None,) * nd
 
-    if axis is None:
-        # trim all axes
-        sl = tuple(slice(*x) for x in zip(start, stop))
-    else:
-        # only trim single axis
-        sl = (slice(None),) * axis + (slice(start[axis], stop[axis]),) + (...,)
-
+    sl = tuple(
+        slice(start[ax], stop[ax]) if ax in axis else slice(None)
+        for ax in range(nd)
+    )
     return filt[sl]
 
 
diff --git a/dpnp/tests/test_manipulation.py b/dpnp/tests/test_manipulation.py
index 373817466f5b..82e4640830a8 100644
--- a/dpnp/tests/test_manipulation.py
+++ b/dpnp/tests/test_manipulation.py
@@ -1432,6 +1432,8 @@ def test_usm_array(self):
 
 
 class TestTrimZeros:
+    ALL_TRIMS = ["F", "B", "fb"]
+
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
     def test_basic(self, dtype):
         a = numpy.array([0, 0, 1, 0, 2, 3, 4, 0], dtype=dtype)
@@ -1443,7 +1445,7 @@ def test_basic(self, dtype):
 
     @testing.with_requires("numpy>=2.2")
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
-    @pytest.mark.parametrize("trim", ["F", "B", "fb"])
+    @pytest.mark.parametrize("trim", ALL_TRIMS)
     @pytest.mark.parametrize("ndim", [0, 1, 2, 3])
     def test_basic_nd(self, dtype, trim, ndim):
         a = numpy.ones((2,) * ndim, dtype=dtype)
@@ -1477,7 +1479,7 @@ def test_all_zero(self, dtype, trim):
 
     @testing.with_requires("numpy>=2.2")
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
-    @pytest.mark.parametrize("trim", ["F", "B", "fb"])
+    @pytest.mark.parametrize("trim", ALL_TRIMS)
     @pytest.mark.parametrize("ndim", [0, 1, 2, 3])
     def test_all_zero_nd(self, dtype, trim, ndim):
         a = numpy.zeros((3,) * ndim, dtype=dtype)
@@ -1496,6 +1498,60 @@ def test_size_zero(self):
         expected = numpy.trim_zeros(a)
         assert_array_equal(result, expected)
 
+    @testing.with_requires("numpy>=2.4")
+    @pytest.mark.parametrize(
+        "shape, axis",
+        [
+            [(5,), None],
+            [(5,), ()],
+            [(5,), 0],
+            [(5, 6), None],
+            [(5, 6), ()],
+            [(5, 6), 0],
+            [(5, 6), (-1,)],
+            [(5, 6, 7), None],
+            [(5, 6, 7), ()],
+            [(5, 6, 7), 1],
+            [(5, 6, 7), (0, 2)],
+            [(5, 6, 7, 8), None],
+            [(5, 6, 7, 8), ()],
+            [(5, 6, 7, 8), -2],
+            [(5, 6, 7, 8), (0, 1, 3)],
+        ],
+    )
+    @pytest.mark.parametrize("trim", ALL_TRIMS)
+    def test_multiple_axes(self, shape, axis, trim):
+        # standardize axis to a tuple
+        if axis is None:
+            axis = tuple(range(len(shape)))
+        elif isinstance(axis, int):
+            axis = (len(shape) + axis if axis < 0 else axis,)
+        else:
+            axis = tuple(len(shape) + ax if ax < 0 else ax for ax in axis)
+
+        # populate a random interior slice with nonzero entries
+        rng = numpy.random.default_rng(4321)
+        a = numpy.zeros(shape)
+        start = rng.integers(low=0, high=numpy.array(shape) - 1)
+        end = rng.integers(low=start + 1, high=shape)
+        shape = tuple(end - start)
+        data = 1 + rng.random(shape)
+        a[tuple(slice(i, j) for i, j in zip(start, end))] = data
+        ia = dpnp.array(a)
+
+        result = dpnp.trim_zeros(ia, axis=axis, trim=trim)
+        expected = numpy.trim_zeros(a, axis=axis, trim=trim)
+        assert_array_equal(result, expected)
+
+    # NOTE: numpy behaves differently on 0-sized input array
+    # and returns the input array with reduced shapes
+    @pytest.mark.parametrize("axis", [None, -1, 0])
+    @pytest.mark.parametrize("trim", ALL_TRIMS)
+    def test_empty_array(self, axis, trim):
+        a = dpnp.ones((0, 3))
+        result = dpnp.trim_zeros(a, axis=axis, trim=trim)
+        assert result is a
+
     @pytest.mark.parametrize(
         "a", [numpy.array([0, 2**62, 0]), numpy.array([0, 2**63, 0])]
     )

From 1f7f4d9f6f85fe5e36bf2020d153db1ffafb773a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 1 Feb 2026 14:33:10 +0100
Subject: [PATCH 06/18] Bump github/codeql-action from 4.31.11 to 4.32.0
 (#2751)

Bumps [github/codeql-action](https://github.com/github/codeql-action)
from 4.31.11 to 4.32.0.
---
 .github/workflows/openssf-scorecard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index a3a833ba853d..a7337e3786ac 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -72,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@19b2f06db2b6f5108140aeb04014ef02b648f789 # v4.31.11
+        uses: github/codeql-action/upload-sarif@b20883b0cd1f46c72ae0ba6d1090936928f9fa30 # v4.32.0
         with:
           sarif_file: results.sarif

From f4591e1cc389a7fcadbc0ec46cfb7c03a8d38c61 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 7 Feb 2026 15:05:55 +0100
Subject: [PATCH 07/18] Bump github/codeql-action from 4.32.0 to 4.32.2 (#2759)

Bumps [github/codeql-action](https://github.com/github/codeql-action)
from 4.32.0 to 4.32.2.
---
 .github/workflows/openssf-scorecard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index a7337e3786ac..441b8806eef9 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -72,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@b20883b0cd1f46c72ae0ba6d1090936928f9fa30 # v4.32.0
+        uses: github/codeql-action/upload-sarif@45cbd0c69e560cd9e7cd7f8c32362050c9b7ded2 # v4.32.2
         with:
           sarif_file: results.sarif

From 2f796e09dfe25cff878d37abefb1457abd0de08f Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 9 Feb 2026 14:01:38 +0100
Subject: [PATCH 08/18] Align strides with numpy (#2747)

The PR changes implementation of `strides` property in `dpnp.ndarray` to
align with NumPy and CuPy and to return bytes displacement in memory
(previously and in dpctl it returns elements displacement).
---
 CHANGELOG.md                                  |  1 +
 dpnp/dpnp_array.py                            | 87 ++++++++++++++-----
 dpnp/dpnp_iface_arraycreation.py              |  6 +-
 dpnp/dpnp_iface_indexing.py                   |  4 +-
 dpnp/dpnp_utils/dpnp_utils_linearalgebra.py   |  2 +-
 dpnp/fft/dpnp_utils_fft.py                    | 14 ++-
 dpnp/linalg/dpnp_utils_linalg.py              | 14 +--
 dpnp/scipy/linalg/_utils.py                   |  3 +-
 dpnp/tests/test_arraycreation.py              |  4 +-
 dpnp/tests/test_ndarray.py                    |  8 +-
 .../cupy/core_tests/test_elementwise.py       |  6 +-
 .../cupy/core_tests/test_ndarray.py           | 33 +++----
 .../core_tests/test_ndarray_copy_and_view.py  | 44 ++++++----
 .../cupy/creation_tests/test_basic.py         | 53 ++++++++---
 .../cupy/creation_tests/test_from_data.py     |  2 +-
 .../cupy/manipulation_tests/test_kind.py      | 14 +--
 .../cupy/manipulation_tests/test_shape.py     |  8 +-
 dpnp/tests/third_party/cupy/testing/_array.py | 11 +--
 18 files changed, 202 insertions(+), 112 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a94d41d131c..61d0e271da9a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -47,6 +47,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Clarified behavior on repeated `axes` in `dpnp.tensordot` and `dpnp.linalg.tensordot` functions [#2733](https://github.com/IntelPython/dpnp/pull/2733)
 * Improved documentation of `file` argument in `dpnp.fromfile` [#2745](https://github.com/IntelPython/dpnp/pull/2745)
 * Aligned `dpnp.trim_zeros` with NumPy 2.4 to support a tuple of integers passed with `axis` keyword [#2746](https://github.com/IntelPython/dpnp/pull/2746)
+* Aligned `strides` property of `dpnp.ndarray` with NumPy and CuPy implementations [#2747](https://github.com/IntelPython/dpnp/pull/2747)
 
 ### Deprecated
 
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 6a2b2fd1977f..f37a3a2b3be3 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -105,6 +105,16 @@ def __init__(
         else:
             buffer = usm_type
 
+        if strides is not None:
+            # dpctl expects strides as elements displacement in memory,
+            # while dpnp (and numpy as well) relies on bytes displacement
+            if dtype is None:
+                dtype = dpnp.default_float_type(
+                    device=device, sycl_queue=sycl_queue
+                )
+            it_sz = dpnp.dtype(dtype).itemsize
+            strides = tuple(el // it_sz for el in strides)
+
         sycl_queue_normalized = dpnp.get_normalized_queue_device(
             device=device, sycl_queue=sycl_queue
         )
@@ -1855,16 +1865,53 @@ def std(
     @property
     def strides(self):
         """
-        Return memory displacement in array elements, upon unit
-        change of respective index.
+        Tuple of bytes to step in each dimension when traversing an array.
 
-        For example, for strides ``(s1, s2, s3)`` and multi-index
-        ``(i1, i2, i3)`` position of the respective element relative
-        to zero multi-index element is ``s1*s1 + s2*i2 + s3*i3``.
+        The byte offset of element ``(i[0], i[1], ..., i[n])`` in an array `a`
+        is::
 
-        """
+            offset = sum(dpnp.array(i) * a.strides)
 
-        return self._array_obj.strides
+        For full documentation refer to :obj:`numpy.ndarray.strides`.
+
+        See Also
+        --------
+        :obj:`dpnp.lib.stride_tricks.as_strided` : Return a view into the array
+            with given shape and strides.
+
+        Examples
+        --------
+        >>> import dpnp as np
+        >>> y = np.reshape(np.arange(2 * 3 * 4, dtype=np.int32), (2, 3, 4))
+        >>> y
+        array([[[ 0,  1,  2,  3],
+                [ 4,  5,  6,  7],
+                [ 8,  9, 10, 11]],
+               [[12, 13, 14, 15],
+                [16, 17, 18, 19],
+                [20, 21, 22, 23]]], dtype=np.int32)
+        >>> y.strides
+        (48, 16, 4)
+        >>> y[1, 1, 1]
+        array(17, dtype=int32)
+        >>> offset = sum(i * s for i, s in zip((1, 1, 1), y.strides))
+        >>> offset // y.itemsize
+        17
+
+        >>> x = np.reshape(np.arange(5*6*7*8, dtype=np.int32), (5, 6, 7, 8))
+        >>> x = x.transpose(2, 3, 1, 0)
+        >>> x.strides
+        (32, 4, 224, 1344)
+        >>> offset = sum(i * s for i, s in zip((3, 5, 2, 2), x.strides))
+        >>> x[3, 5, 2, 2]
+        array(813, dtype=int32)
+        >>> offset // x.itemsize
+        813
+
+        """
+
+        it_sz = self.itemsize
+        return tuple(el * it_sz for el in self._array_obj.strides)
 
     def sum(
         self,
@@ -2335,23 +2382,20 @@ def view(self, /, dtype=None, *, type=None):
 
         # resize on last axis only
         axis = ndim - 1
-        if old_sh[axis] != 1 and self.size != 0 and old_strides[axis] != 1:
+        if (
+            old_sh[axis] != 1
+            and self.size != 0
+            and old_strides[axis] != old_itemsz
+        ):
             raise ValueError(
                 "To change to a dtype of a different size, "
                 "the last axis must be contiguous"
             )
 
         # normalize strides whenever itemsize changes
-        if old_itemsz > new_itemsz:
-            new_strides = list(
-                el * (old_itemsz // new_itemsz) for el in old_strides
-            )
-        else:
-            new_strides = list(
-                el // (new_itemsz // old_itemsz) for el in old_strides
-            )
-        new_strides[axis] = 1
-        new_strides = tuple(new_strides)
+        new_strides = tuple(
+            old_strides[i] if i != axis else new_itemsz for i in range(ndim)
+        )
 
         new_dim = old_sh[axis] * old_itemsz
         if new_dim % new_itemsz != 0:
@@ -2361,9 +2405,10 @@ def view(self, /, dtype=None, *, type=None):
             )
 
         # normalize shape whenever itemsize changes
-        new_sh = list(old_sh)
-        new_sh[axis] = new_dim // new_itemsz
-        new_sh = tuple(new_sh)
+        new_sh = tuple(
+            old_sh[i] if i != axis else new_dim // new_itemsz
+            for i in range(ndim)
+        )
 
         return dpnp_array(
             new_sh,
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 539df84d3638..e7b902647186 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -105,7 +105,7 @@ def _get_empty_array(
         elif a.flags.c_contiguous:
             order = "C"
         else:
-            strides = _get_strides_for_order_k(a, _shape)
+            strides = _get_strides_for_order_k(a, _dtype, shape=_shape)
             order = "C"
     elif order not in "cfCF":
         raise ValueError(
@@ -122,7 +122,7 @@ def _get_empty_array(
     )
 
 
-def _get_strides_for_order_k(x, shape=None):
+def _get_strides_for_order_k(x, dtype, shape=None):
     """
     Calculate strides when order='K' for empty_like, ones_like, zeros_like,
     and full_like where `shape` is ``None`` or len(shape) == x.ndim.
@@ -130,7 +130,7 @@ def _get_strides_for_order_k(x, shape=None):
     """
     stride_and_index = sorted([(abs(s), -i) for i, s in enumerate(x.strides)])
     strides = [0] * x.ndim
-    stride = 1
+    stride = dpnp.dtype(dtype).itemsize
     for _, i in stride_and_index:
         strides[-i] = stride
         stride *= shape[-i] if shape else x.shape[-i]
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index 6e7ab778299b..7718412701e8 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -731,10 +731,10 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
     elif 0 < offset < m:
         out_shape = a_shape[:-2] + (min(n, m - offset),)
         out_strides = a_straides[:-2] + (st_n + st_m,)
-        out_offset = st_m * offset
+        out_offset = st_m // a.itemsize * offset
     else:
         out_shape = a_shape[:-2] + (0,)
-        out_strides = a_straides[:-2] + (1,)
+        out_strides = a_straides[:-2] + (a.itemsize,)
         out_offset = 0
 
     return dpnp_array(
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
index 30be5d1ff5cb..191b8aa65d13 100644
--- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -185,7 +185,7 @@ def _define_contig_flag(x):
     """
 
     flag = False
-    x_strides = x.strides
+    x_strides = dpnp.get_usm_ndarray(x).strides
     x_shape = x.shape
     if x.ndim < 2:
         return True, True, True
diff --git a/dpnp/fft/dpnp_utils_fft.py b/dpnp/fft/dpnp_utils_fft.py
index 4e2b7aaaf842..709494e6255e 100644
--- a/dpnp/fft/dpnp_utils_fft.py
+++ b/dpnp/fft/dpnp_utils_fft.py
@@ -193,12 +193,13 @@ def _compute_result(dsc, a, out, forward, c2c, out_strides):
         )
         result = a
     else:
+        out_usm = None if out is None else dpnp.get_usm_ndarray(out)
         if (
             out is not None
-            and out.strides == tuple(out_strides)
-            and not ti._array_overlap(a_usm, dpnp.get_usm_ndarray(out))
+            and out_usm.strides == tuple(out_strides)
+            and not ti._array_overlap(a_usm, out_usm)
         ):
-            res_usm = dpnp.get_usm_ndarray(out)
+            res_usm = out_usm
             result = out
         else:
             # Result array that is used in oneMKL must have the exact same
@@ -223,6 +224,10 @@ def _compute_result(dsc, a, out, forward, c2c, out_strides):
                         if a.dtype == dpnp.complex64
                         else dpnp.float64
                     )
+            # cast to expected strides format
+            out_strides = tuple(
+                el * dpnp.dtype(out_dtype).itemsize for el in out_strides
+            )
             result = dpnp_array(
                 out_shape,
                 dtype=out_dtype,
@@ -419,7 +424,8 @@ def _fft(a, norm, out, forward, in_place, c2c, axes, batch_fft=True):
     if cufft_wa:  # pragma: no cover
         a = dpnp.moveaxis(a, -1, -2)
 
-    a_strides = _standardize_strides_to_nonzero(a.strides, a.shape)
+    strides = dpnp.get_usm_ndarray(a).strides
+    a_strides = _standardize_strides_to_nonzero(strides, a.shape)
     dsc, out_strides = _commit_descriptor(
         a, forward, in_place, c2c, a_strides, index, batch_fft
     )
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index 196cd2ae9da5..6881c7787e9f 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -215,7 +215,7 @@ def _batched_inv(a, res_type):
     _manager.add_event_pair(ht_ev, copy_ev)
 
     ipiv_stride = n
-    a_stride = a_h.strides[0]
+    a_stride = a_h.strides[0] // a_h.itemsize
 
     # Call the LAPACK extension function _getrf_batch
     # to perform LU decomposition of a batch of general matrices
@@ -298,7 +298,7 @@ def _batched_lu_factor(a, res_type):
         dev_info_h = [0] * batch_size
 
         ipiv_stride = n
-        a_stride = a_h.strides[0]
+        a_stride = a_h.strides[0] // a_h.itemsize
 
         # Call the LAPACK extension function _getrf_batch
         # to perform LU decomposition of a batch of general matrices
@@ -471,8 +471,8 @@ def _batched_qr(a, mode="reduced"):
         dtype=res_type,
     )
 
-    a_stride = a_t.strides[0]
-    tau_stride = tau_h.strides[0]
+    a_stride = a_t.strides[0] // a_t.itemsize
+    tau_stride = tau_h.strides[0] // tau_h.itemsize
 
     # Call the LAPACK extension function _geqrf_batch to compute
     # the QR factorization of a general m x n matrix.
@@ -535,8 +535,8 @@ def _batched_qr(a, mode="reduced"):
     )
     _manager.add_event_pair(ht_ev, copy_ev)
 
-    q_stride = q.strides[0]
-    tau_stride = tau_h.strides[0]
+    q_stride = q.strides[0] // q.itemsize
+    tau_stride = tau_h.strides[0] // tau_h.itemsize
 
     # Get LAPACK function (_orgqr_batch for real or _ungqf_batch for complex
     # data types) for QR factorization
@@ -1818,7 +1818,7 @@ def dpnp_cholesky_batch(a, upper_lower, res_type):
     )
     _manager.add_event_pair(ht_ev, copy_ev)
 
-    a_stride = a_h.strides[0]
+    a_stride = a_h.strides[0] // a_h.itemsize
 
     # Call the LAPACK extension function _potrf_batch
     # to computes the Cholesky decomposition of a batch of
diff --git a/dpnp/scipy/linalg/_utils.py b/dpnp/scipy/linalg/_utils.py
index 282c645d1095..f00db6fdfb92 100644
--- a/dpnp/scipy/linalg/_utils.py
+++ b/dpnp/scipy/linalg/_utils.py
@@ -37,6 +37,7 @@
 
 """
 
+# pylint: disable=duplicate-code
 # pylint: disable=no-name-in-module
 # pylint: disable=protected-access
 
@@ -144,7 +145,7 @@ def _batched_lu_factor_scipy(a, res_type):  # pylint: disable=too-many-locals
         dev_info_h = [0] * batch_size
 
         ipiv_stride = k
-        a_stride = a_h.strides[-1]
+        a_stride = a_h.strides[-1] // a_h.itemsize
 
         # Call the LAPACK extension function _getrf_batch
         # to perform LU decomposition of a batch of general matrices
diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py
index eb20f9b3ffe5..d8a80ddbff78 100644
--- a/dpnp/tests/test_arraycreation.py
+++ b/dpnp/tests/test_arraycreation.py
@@ -861,12 +861,12 @@ def test_full_order(order1, order2):
 def test_full_strides():
     a = numpy.full((3, 3), numpy.arange(3, dtype="i4"))
     ia = dpnp.full((3, 3), dpnp.arange(3, dtype="i4"))
-    assert ia.strides == tuple(el // a.itemsize for el in a.strides)
+    assert ia.strides == a.strides
     assert_array_equal(ia, a)
 
     a = numpy.full((3, 3), numpy.arange(6, dtype="i4")[::2])
     ia = dpnp.full((3, 3), dpnp.arange(6, dtype="i4")[::2])
-    assert ia.strides == tuple(el // a.itemsize for el in a.strides)
+    assert ia.strides == a.strides
     assert_array_equal(ia, a)
 
 
diff --git a/dpnp/tests/test_ndarray.py b/dpnp/tests/test_ndarray.py
index c58c26fdf977..4e4e42bbc85e 100644
--- a/dpnp/tests/test_ndarray.py
+++ b/dpnp/tests/test_ndarray.py
@@ -60,10 +60,10 @@ def test_attributes(self):
         assert_equal(self.three.shape, (10, 3, 2))
         self.three.shape = (2, 5, 6)
 
-        assert_equal(self.one.strides, (self.one.itemsize / self.one.itemsize,))
-        num = self.two.itemsize / self.two.itemsize
+        assert_equal(self.one.strides, (self.one.itemsize,))
+        num = self.two.itemsize
         assert_equal(self.two.strides, (5 * num, num))
-        num = self.three.itemsize / self.three.itemsize
+        num = self.three.itemsize
         assert_equal(self.three.strides, (30 * num, 6 * num, num))
 
         assert_equal(self.one.ndim, 1)
@@ -290,7 +290,7 @@ def test_flags_strides(dtype, order, strides):
         (4, 4), dtype=dtype, order=order, strides=strides
     )
     a = numpy.ndarray((4, 4), dtype=dtype, order=order, strides=numpy_strides)
-    ia = dpnp.ndarray((4, 4), dtype=dtype, order=order, strides=strides)
+    ia = dpnp.ndarray((4, 4), dtype=dtype, order=order, strides=numpy_strides)
     assert usm_array.flags == ia.flags
     assert a.flags.c_contiguous == ia.flags.c_contiguous
     assert a.flags.f_contiguous == ia.flags.f_contiguous
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_elementwise.py b/dpnp/tests/third_party/cupy/core_tests/test_elementwise.py
index b2d6e65cd37a..a024dd59d702 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_elementwise.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_elementwise.py
@@ -6,8 +6,6 @@
 import dpnp as cupy
 from dpnp.tests.helper import (
     has_support_aspect64,
-    is_win_platform,
-    numpy_version,
 )
 from dpnp.tests.third_party.cupy import testing
 
@@ -67,10 +65,10 @@ def test_copy_orders(self, order):
         a = cupy.empty((2, 3, 4))
         b = cupy.copy(a, order)
 
-        a_cpu = numpy.empty((2, 3, 4))
+        a_cpu = numpy.empty((2, 3, 4), dtype=a.dtype)
         b_cpu = numpy.copy(a_cpu, order)
 
-        assert b.strides == tuple(x / b_cpu.itemsize for x in b_cpu.strides)
+        assert b.strides == b_cpu.strides
 
 
 @pytest.mark.skip("`ElementwiseKernel` isn't supported")
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
index d782eb9f41ec..95d753c90473 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
@@ -8,9 +8,19 @@
 import pytest
 from dpctl.tensor._numpy_helper import AxisError
 
+# from cupy_backends.cuda.api import driver
+# from cupy_backends.cuda.api import runtime
+# from cupy_backends.cuda import stream as stream_module
 import dpnp as cupy
+
+# from cupy import _util
+# from cupy import _core
+# from cupy import cuda
+# from cupy import get_array_module
 from dpnp.tests.third_party.cupy import testing
 
+# from cupy.exceptions import AxisError
+
 
 def get_array_module(*args):
     for arg in args:
@@ -67,8 +77,8 @@ def test_memptr_with_strides(self):
         memptr = buf.data
 
         # self-overlapping strides
-        a = cupy.ndarray((2, 3), numpy.float32, memptr, strides=(2, 1))
-        assert a.strides == (2, 1)
+        a = cupy.ndarray((2, 3), numpy.float32, memptr, strides=(8, 4))
+        assert a.strides == (8, 4)
 
         a[:] = 1
         a[0, 2] = 4
@@ -85,23 +95,21 @@ def test_strides_without_memptr(self):
     def test_strides_is_given_and_order_is_ignored(self):
         buf = cupy.ndarray(20, numpy.uint8)
         a = cupy.ndarray(
-            (2, 3), numpy.float32, buf.data, strides=(2, 1), order="C"
+            (2, 3), numpy.float32, buf.data, strides=(8, 4), order="C"
         )
-        assert a.strides == (2, 1)
+        assert a.strides == (8, 4)
 
     @testing.with_requires("numpy>=1.19")
     def test_strides_is_given_but_order_is_invalid(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
-                xp.ndarray((2, 3), numpy.float32, strides=(2, 1), order="!")
+                xp.ndarray((2, 3), numpy.float32, strides=(8, 4), order="!")
 
     def test_order(self):
         shape = (2, 3, 4)
         a = cupy.ndarray(shape, order="F")
         a_cpu = numpy.ndarray(shape, order="F", dtype=a.dtype)
-        assert all(
-            i * a.itemsize == j for i, j in zip(a.strides, a_cpu.strides)
-        )
+        assert a.strides == a_cpu.strides
         assert a.flags.f_contiguous
         assert not a.flags.c_contiguous
 
@@ -111,9 +119,7 @@ def test_order_none(self):
         a_cpu = numpy.ndarray(shape, order=None, dtype=a.dtype)
         assert a.flags.c_contiguous == a_cpu.flags.c_contiguous
         assert a.flags.f_contiguous == a_cpu.flags.f_contiguous
-        assert all(
-            i * a.itemsize == j for i, j in zip(a.strides, a_cpu.strides)
-        )
+        assert a.strides == a_cpu.strides
 
     def test_slots(self):
         # Test for #7883.
@@ -147,10 +153,7 @@ class TestNdarrayInitStrides(unittest.TestCase):
     @testing.numpy_cupy_equal()
     def test_strides(self, xp):
         arr = xp.ndarray(self.shape, dtype=self.dtype, order=self.order)
-        strides = arr.strides
-        if xp is cupy:
-            strides = tuple(i * arr.itemsize for i in strides)
-        return (strides, arr.flags.c_contiguous, arr.flags.f_contiguous)
+        return (arr.strides, arr.flags.c_contiguous, arr.flags.f_contiguous)
 
 
 class TestNdarrayInitRaise(unittest.TestCase):
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
index 61980b6eda9b..7b503f1997a5 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray_copy_and_view.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import numpy
 import pytest
 
@@ -8,6 +10,7 @@
     from numpy.exceptions import ComplexWarning
 else:
     from numpy import ComplexWarning
+# from cupy import _util
 
 
 def astype_without_warning(x, dtype, *args, **kwargs):
@@ -19,12 +22,6 @@ def astype_without_warning(x, dtype, *args, **kwargs):
         return x.astype(dtype, *args, **kwargs)
 
 
-def get_strides(xp, a):
-    if xp is numpy:
-        return tuple(el // a.itemsize for el in a.strides)
-    return a.strides
-
-
 class TestView:
 
     @testing.numpy_cupy_array_equal()
@@ -189,6 +186,9 @@ def test_view_larger_dtype_zero_sized(self, xp):
 
 class TestArrayCopy:
 
+    # @pytest.mark.skipif(
+    #     not _util.ENABLE_SLICE_COPY, reason="Special copy disabled"
+    # )
     @testing.for_orders("CF")
     @testing.for_dtypes(
         [numpy.int16, numpy.int64, numpy.float16, numpy.float64]
@@ -200,13 +200,19 @@ def test_isinstance_numpy_copy(self, xp, dtype, order):
         b[:] = a
         return b
 
+    # @pytest.mark.skipif(
+    #     not _util.ENABLE_SLICE_COPY, reason="Special copy disabled"
+    # )
     @pytest.mark.skip("copy from host to device is allowed")
     def test_isinstance_numpy_copy_wrong_dtype(self):
-        a = numpy.arange(100, dtype=cupy.default_float_type()).reshape(10, 10)
+        a = numpy.arange(100, dtype=numpy.float64).reshape(10, 10)
         b = cupy.empty(a.shape, dtype=numpy.int32)
         with pytest.raises(ValueError):
             b[:] = a
 
+    # @pytest.mark.skipif(
+    #     not _util.ENABLE_SLICE_COPY, reason="Special copy disabled"
+    # )
     def test_isinstance_numpy_copy_wrong_shape(self):
         for xp in (numpy, cupy):
             a = numpy.arange(100, dtype=cupy.default_float_type()).reshape(
@@ -216,12 +222,18 @@ def test_isinstance_numpy_copy_wrong_shape(self):
             with pytest.raises(ValueError):
                 b[:] = a
 
+    # @pytest.mark.skipif(
+    #     not _util.ENABLE_SLICE_COPY, reason="Special copy disabled"
+    # )
     @testing.numpy_cupy_array_equal()
     def test_isinstance_numpy_copy_not_slice(self, xp):
         a = xp.arange(5, dtype=cupy.default_float_type())
         a[a < 3] = 0
         return a
 
+    # @pytest.mark.skipif(
+    #     not _util.ENABLE_SLICE_COPY, reason="Special copy disabled"
+    # )
     @pytest.mark.skip("copy from host to device is allowed")
     def test_copy_host_to_device_view(self):
         dev = cupy.empty((10, 10), dtype=numpy.float32)[2:5, 1:8]
@@ -358,24 +370,24 @@ def test_astype_type_f_contiguous_no_copy(self, dtype, order):
     @testing.numpy_cupy_equal()
     def test_astype_strides(self, xp, src_dtype, dst_dtype):
         src = testing.shaped_arange((1, 2, 3), xp, dtype=src_dtype)
-        dst = astype_without_warning(src, dst_dtype, order="K")
-        return get_strides(xp, dst)
+        return astype_without_warning(src, dst_dtype, order="K").strides
 
     @testing.for_all_dtypes_combination(("src_dtype", "dst_dtype"))
     @testing.numpy_cupy_equal()
     def test_astype_strides_negative(self, xp, src_dtype, dst_dtype):
         src = testing.shaped_arange((2, 3), xp, dtype=src_dtype)
         src = src[::-1, :]
-        dst = astype_without_warning(src, dst_dtype, order="K")
-        return tuple(abs(x) for x in get_strides(xp, dst))
+        return tuple(
+            abs(el)
+            for el in astype_without_warning(src, dst_dtype, order="K").strides
+        )
 
     @testing.for_all_dtypes_combination(("src_dtype", "dst_dtype"))
     @testing.numpy_cupy_equal()
     def test_astype_strides_swapped(self, xp, src_dtype, dst_dtype):
         src = testing.shaped_arange((2, 3, 4), xp, dtype=src_dtype)
         src = xp.swapaxes(src, 1, 0)
-        dst = astype_without_warning(src, dst_dtype, order="K")
-        return get_strides(xp, dst)
+        return astype_without_warning(src, dst_dtype, order="K").strides
 
     @testing.for_all_dtypes_combination(("src_dtype", "dst_dtype"))
     @testing.numpy_cupy_equal()
@@ -383,8 +395,7 @@ def test_astype_strides_broadcast(self, xp, src_dtype, dst_dtype):
         src1 = testing.shaped_arange((2, 3, 2), xp, dtype=src_dtype)
         src2 = testing.shaped_arange((2,), xp, dtype=src_dtype)
         src, _ = xp.broadcast_arrays(src1, src2)
-        dst = astype_without_warning(src, dst_dtype, order="K")
-        return get_strides(xp, dst)
+        return astype_without_warning(src, dst_dtype, order="K").strides
 
     @testing.numpy_cupy_array_equal()
     def test_astype_boolean_view(self, xp):
@@ -413,6 +424,9 @@ def test_diagonal2(self, xp, dtype):
     {"src_order": "F"},
 )
 class TestNumPyArrayCopyView:
+    # @pytest.mark.skipif(
+    #     not _util.ENABLE_SLICE_COPY, reason="Special copy disabled"
+    # )
     @testing.for_orders("CF")
     @testing.for_dtypes(
         [numpy.int16, numpy.int64, numpy.float16, numpy.float64]
diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py
index 8265671ab350..a9e382d22798 100644
--- a/dpnp/tests/third_party/cupy/creation_tests/test_basic.py
+++ b/dpnp/tests/third_party/cupy/creation_tests/test_basic.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import warnings
 
 import numpy
@@ -152,10 +154,7 @@ def test_empty_like_K_strides(self, dtype):
         bg.fill(0)
 
         # make sure NumPy and CuPy strides agree
-        scaled_numpy_strides = b.strides
-        scale = b.itemsize
-        numpy_strides = tuple(i / scale for i in scaled_numpy_strides)
-        assert numpy_strides == bg.strides
+        assert b.strides == bg.strides
         return
 
     @testing.with_requires("numpy>=1.19")
@@ -171,7 +170,7 @@ def test_empty_like_subok(self):
         with pytest.raises(NotImplementedError):
             cupy.empty_like(a, subok=True)
 
-    @pytest.mark.skip("strides for zero sized array is different")
+    @pytest.mark.skip("strides for zero sized array are different")
     @testing.for_CF_orders()
     @testing.with_requires("numpy>=1.23")
     def test_empty_zero_sized_array_strides(self, order):
@@ -221,8 +220,7 @@ def test_zeros_int(self, xp, dtype, order):
     def test_zeros_strides(self, order):
         a = numpy.zeros((2, 3), dtype=cupy.default_float_type(), order=order)
         b = cupy.zeros((2, 3), dtype=cupy.default_float_type(), order=order)
-        b_strides = tuple(x * b.itemsize for x in b.strides)
-        assert b_strides == a.strides
+        assert b.strides == a.strides
 
     @testing.for_orders("CFAK")
     @testing.for_all_dtypes()
@@ -236,6 +234,15 @@ def test_zeros_like_subok(self):
         with pytest.raises(NotImplementedError):
             cupy.zeros_like(a, subok=True)
 
+    @pytest.mark.skip("only native byteorder is supported")
+    def test_reject_byteswap(self):
+        # Reject creation of arrays with bad byte-order at a low level
+        with pytest.raises(ValueError, match=".*byte-order"):
+            cupy.ndarray((2, 3, 4), dtype=">i")
+
+        with pytest.raises(ValueError, match=".*byte-order"):
+            cupy.zeros((2, 3, 4), dtype=">i")
+
     @testing.for_CF_orders()
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
@@ -254,6 +261,33 @@ def test_ones_like_subok(self):
         with pytest.raises(NotImplementedError):
             cupy.ones_like(a, subok=True)
 
+    @pytest.mark.parametrize(
+        "shape, strides",
+        [
+            ((2, 3, 4), (8 * 3 * 4, 8 * 4, 8)),  # contiguous
+            ((2, 3, 4), (8, 0, 8)),  # smaller than contiguous needed
+            ((2, 0, 4), (8, 128, 1024)),  # empty can be OK
+        ],
+    )
+    def test_ndarray_strides(self, shape, strides):
+        a = cupy.ndarray(
+            shape, strides=strides, dtype=cupy.default_float_type()
+        )
+        assert cupy.byte_bounds(a)[0] == a.data.ptr
+        assert cupy.byte_bounds(a)[1] - a.data.ptr <= a.data.size
+
+    @pytest.mark.skip("due to dpctl-2239")
+    @pytest.mark.parametrize(
+        "shape, strides",
+        [
+            ((2, 3, 4), (8, 128, 1024)),  # too large
+            ((2, 3, 4), (-8, 8, 8)),  # negative (needs offset)
+        ],
+    )
+    def test_ndarray_strides_raises(self, shape, strides):
+        with pytest.raises(ValueError, match=r"ndarray\(\) with strides.*"):
+            cupy.ndarray(shape, strides=strides)
+
     @testing.for_CF_orders()
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
@@ -457,10 +491,7 @@ def test_empty_like_K_strides_reshape(self, dtype):
         bg.fill(0)
 
         # make sure NumPy and CuPy strides agree
-        scaled_numpy_strides = b.strides
-        scale = b.itemsize
-        numpy_strides = tuple(i / scale for i in scaled_numpy_strides)
-        assert numpy_strides == bg.strides
+        assert b.strides == bg.strides
         return
 
     @testing.with_requires("numpy>=1.17.0")
diff --git a/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py b/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py
index 47505c6c00df..a2496c855b02 100644
--- a/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py
+++ b/dpnp/tests/third_party/cupy/creation_tests/test_from_data.py
@@ -131,7 +131,7 @@ def test_array_from_nested_list_of_numpy(
     @testing.for_orders("CFAK", name="dst_order")
     @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
     @testing.numpy_cupy_array_equal(
-        type_check=has_support_aspect64(), strides_check=True
+        type_check=has_support_aspect64(), strides_check=has_support_aspect64()
     )
     def test_array_from_list_of_cupy(
         self, xp, dtype1, dtype2, src_order, dst_order
diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_kind.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_kind.py
index b327b91a5616..e0bc9c7eb49e 100644
--- a/dpnp/tests/third_party/cupy/manipulation_tests/test_kind.py
+++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_kind.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import unittest
 
 import numpy
@@ -35,8 +37,6 @@ def func(xp):
             ret = xp.asfortranarray(x)
             assert x.flags.c_contiguous
             assert ret.flags.f_contiguous
-            if xp is cupy:
-                return tuple(el * ret.itemsize for el in ret.strides)
             return ret.strides
 
         assert func(numpy) == func(cupy)
@@ -48,8 +48,6 @@ def func(xp):
             ret = xp.asfortranarray(x)
             assert x.flags.c_contiguous
             assert ret.flags.f_contiguous
-            if xp is cupy:
-                return tuple(el * ret.itemsize for el in ret.strides)
             return ret.strides
 
         assert func(numpy) == func(cupy)
@@ -61,8 +59,6 @@ def func(xp):
             ret = xp.asfortranarray(xp.asfortranarray(x))
             assert x.flags.c_contiguous
             assert ret.flags.f_contiguous
-            if xp is cupy:
-                return tuple(el * ret.itemsize for el in ret.strides)
             return ret.strides
 
         assert func(numpy) == func(cupy)
@@ -74,8 +70,6 @@ def func(xp):
             x = xp.transpose(x, (1, 0))
             ret = xp.asfortranarray(x)
             assert ret.flags.f_contiguous
-            if xp is cupy:
-                return tuple(el * ret.itemsize for el in ret.strides)
             return ret.strides
 
         assert func(numpy) == func(cupy)
@@ -87,8 +81,6 @@ def func(xp):
             ret = xp.asfortranarray(x)
             assert x.flags.c_contiguous
             assert ret.flags.f_contiguous
-            if xp is cupy:
-                return tuple(el * ret.itemsize for el in ret.strides)
             return ret.strides
 
         assert func(numpy) == func(cupy)
@@ -106,7 +98,7 @@ def test_require_flag_check(self, dtype):
     @pytest.mark.skip("dpnp.require() does not support requirement ['O']")
     @testing.for_all_dtypes()
     def test_require_owndata(self, dtype):
-        x = cupy.zeros((2, 3, 4), dtype=dtype)
+        x = cupy.zeros((2, 3, 4), dtype)
         arr = x.view()
         arr = cupy.require(arr, dtype, ["O"])
         assert arr.flags["OWNDATA"]
diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_shape.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_shape.py
index bec0215d4b64..0520fbbc0ff9 100644
--- a/dpnp/tests/third_party/cupy/manipulation_tests/test_shape.py
+++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_shape.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import numpy
 import pytest
 
@@ -25,8 +27,6 @@ class TestReshape:
     def test_reshape_strides(self):
         def func(xp):
             a = testing.shaped_arange((1, 1, 1, 2, 2), xp)
-            if xp is cupy:
-                return tuple(el * a.itemsize for el in a.strides)
             return a.strides
 
         assert func(numpy) == func(cupy)
@@ -259,5 +259,7 @@ def test_reshape_contiguity(self, order_init, order_reshape, shape_in_out):
         assert b_cupy.flags.f_contiguous == b_numpy.flags.f_contiguous
         assert b_cupy.flags.c_contiguous == b_numpy.flags.c_contiguous
 
-        # testing.assert_array_equal(b_cupy.strides, b_numpy.strides)
+        if shape_final != (1, 6, 1):
+            # strides mismatching is allowed due to multiple representation
+            testing.assert_array_equal(b_cupy.strides, b_numpy.strides)
         testing.assert_array_equal(b_cupy, b_numpy)
diff --git a/dpnp/tests/third_party/cupy/testing/_array.py b/dpnp/tests/third_party/cupy/testing/_array.py
index 552dc19f456f..8c88cacb2ff7 100644
--- a/dpnp/tests/third_party/cupy/testing/_array.py
+++ b/dpnp/tests/third_party/cupy/testing/_array.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import warnings
 
 import numpy
@@ -171,18 +173,13 @@ def assert_array_equal(
         )
 
     if strides_check:
-        strides = desired.strides
-        if isinstance(actual, cupy.ndarray):
-            # need to agreed the strides with numpy.ndarray
-            strides = tuple(el // desired.itemsize for el in desired.strides)
-
-        if actual.strides != strides:
+        if actual.strides != desired.strides:
             msg = ["Strides are not equal:"]
             if err_msg:
                 msg = [msg[0] + " " + err_msg]
             if verbose:
                 msg.append(" x: {}".format(actual.strides))
-                msg.append(" y: {}".format(strides))
+                msg.append(" y: {}".format(desired.strides))
             raise AssertionError("\n".join(msg))
 
 

From 06a74cd12dd8962d316e24817c0969ed9e900d8d Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 9 Feb 2026 16:37:33 +0100
Subject: [PATCH 09/18] Extend `nan_to_num` with broadcast support of `nan`,
 `posinf`, and `neginf` (#2754)

The PR extends implementation of `dpnp.nan_to_num` function to align
with NumPy and CuPy which supports `nan`, `posinf`, and `neginf`
keywords as any array through broadcasting.

This PR adds handling for a common path where at least one of the
keywords has non-scalar value.
The path does not assume a dedicated SYCL kernel, instead proposes to
rely on implementation through existing python functions. That can be
improved in the future if required.
---
 CHANGELOG.md                                  |   1 +
 dpnp/dpnp_iface_mathematical.py               | 112 ++++++++++++------
 dpnp/tests/test_mathematical.py               |  54 ++++-----
 .../third_party/cupy/math_tests/test_misc.py  |  54 +++++++--
 4 files changed, 144 insertions(+), 77 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 61d0e271da9a..b2e11822431a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -48,6 +48,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Improved documentation of `file` argument in `dpnp.fromfile` [#2745](https://github.com/IntelPython/dpnp/pull/2745)
 * Aligned `dpnp.trim_zeros` with NumPy 2.4 to support a tuple of integers passed with `axis` keyword [#2746](https://github.com/IntelPython/dpnp/pull/2746)
 * Aligned `strides` property of `dpnp.ndarray` with NumPy and CuPy implementations [#2747](https://github.com/IntelPython/dpnp/pull/2747)
+* Extended `dpnp.nan_to_num` to support broadcasting of `nan`, `posinf`, and `neginf` keywords [#2754](https://github.com/IntelPython/dpnp/pull/2754)
 
 ### Deprecated
 
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 3e6a4b0ed121..e339c24d384c 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -3646,20 +3646,24 @@ def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None):
         an array does not require a copy.
 
         Default: ``True``.
-    nan : {int, float, bool}, optional
-        Value to be used to fill ``NaN`` values.
+    nan : {scalar, array_like}, optional
+        Values to be used to fill ``NaN`` values. If no values are passed then
+        ``NaN`` values will be replaced with ``0.0``.
+        Expected to have a real-valued data type for the values.
 
         Default: ``0.0``.
-    posinf : {int, float, bool, None}, optional
-        Value to be used to fill positive infinity values. If no value is
+    posinf : {None, scalar, array_like}, optional
+        Values to be used to fill positive infinity values. If no values are
         passed then positive infinity values will be replaced with a very
         large number.
+        Expected to have a real-valued data type for the values.
 
         Default: ``None``.
-    neginf : {int, float, bool, None} optional
-        Value to be used to fill negative infinity values. If no value is
+    neginf : {None, scalar, array_like}, optional
+        Values to be used to fill negative infinity values. If no values are
         passed then negative infinity values will be replaced with a very
         small (or negative) number.
+        Expected to have a real-valued data type for the values.
 
         Default: ``None``.
 
@@ -3687,6 +3691,7 @@ def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None):
     array(-1.79769313e+308)
     >>> np.nan_to_num(np.array(np.nan))
     array(0.)
+
     >>> x = np.array([np.inf, -np.inf, np.nan, -128, 128])
     >>> np.nan_to_num(x)
     array([ 1.79769313e+308, -1.79769313e+308,  0.00000000e+000,
@@ -3694,6 +3699,14 @@ def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None):
     >>> np.nan_to_num(x, nan=-9999, posinf=33333333, neginf=33333333)
     array([ 3.3333333e+07,  3.3333333e+07, -9.9990000e+03, -1.2800000e+02,
             1.2800000e+02])
+
+    >>> nan = np.array([11, 12, -9999, 13, 14])
+    >>> posinf = np.array([33333333, 11, 12, 13, 14])
+    >>> neginf = np.array([11, 33333333, 12, 13, 14])
+    >>> np.nan_to_num(x, nan=nan, posinf=posinf, neginf=neginf)
+    array([ 3.3333333e+07,  3.3333333e+07, -9.9990000e+03, -1.2800000e+02,
+            1.2800000e+02])
+
     >>> y = np.array([complex(np.inf, np.nan), np.nan, complex(np.nan, np.inf)])
     >>> np.nan_to_num(y)
     array([1.79769313e+308 +0.00000000e+000j, # may vary
@@ -3706,33 +3719,32 @@ def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None):
 
     dpnp.check_supported_arrays_type(x)
 
-    # Python boolean is a subtype of an integer
-    # so additional check for bool is not needed.
-    if not isinstance(nan, (int, float)):
-        raise TypeError(
-            "nan must be a scalar of an integer, float, bool, "
-            f"but got {type(nan)}"
-        )
-    x_type = x.dtype.type
+    def _check_nan_inf(val, val_dt):
+        # Python boolean is a subtype of an integer
+        if not isinstance(val, (int, float)):
+            val = dpnp.asarray(
+                val, dtype=val_dt, sycl_queue=x.sycl_queue, usm_type=x.usm_type
+            )
+        return val
 
-    if not issubclass(x_type, dpnp.inexact):
+    x_type = x.dtype.type
+    if not dpnp.issubdtype(x_type, dpnp.inexact):
         return dpnp.copy(x) if copy else dpnp.get_result_array(x)
 
     max_f, min_f = _get_max_min(x.real.dtype)
+
+    # get dtype of nan and infs values if casting required
+    is_complex = dpnp.issubdtype(x_type, dpnp.complexfloating)
+    if is_complex:
+        val_dt = x.real.dtype
+    else:
+        val_dt = x.dtype
+
+    nan = _check_nan_inf(nan, val_dt)
     if posinf is not None:
-        if not isinstance(posinf, (int, float)):
-            raise TypeError(
-                "posinf must be a scalar of an integer, float, bool, "
-                f"or be None, but got {type(posinf)}"
-            )
-        max_f = posinf
+        max_f = _check_nan_inf(posinf, val_dt)
     if neginf is not None:
-        if not isinstance(neginf, (int, float)):
-            raise TypeError(
-                "neginf must be a scalar of an integer, float, bool, "
-                f"or be None, but got {type(neginf)}"
-            )
-        min_f = neginf
+        min_f = _check_nan_inf(neginf, val_dt)
 
     if copy:
         out = dpnp.empty_like(x)
@@ -3741,19 +3753,45 @@ def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None):
             raise ValueError("copy is required for read-only array `x`")
         out = x
 
-    x_ary = dpnp.get_usm_ndarray(x)
-    out_ary = dpnp.get_usm_ndarray(out)
+    # handle a special case when nan and infs are all scalars
+    if all(dpnp.isscalar(el) for el in (nan, max_f, min_f)):
+        x_ary = dpnp.get_usm_ndarray(x)
+        out_ary = dpnp.get_usm_ndarray(out)
+
+        q = x.sycl_queue
+        _manager = dpu.SequentialOrderManager[q]
+
+        h_ev, comp_ev = ufi._nan_to_num(
+            x_ary,
+            nan,
+            max_f,
+            min_f,
+            out_ary,
+            q,
+            depends=_manager.submitted_events,
+        )
 
-    q = x.sycl_queue
-    _manager = dpu.SequentialOrderManager[q]
+        _manager.add_event_pair(h_ev, comp_ev)
 
-    h_ev, comp_ev = ufi._nan_to_num(
-        x_ary, nan, max_f, min_f, out_ary, q, depends=_manager.submitted_events
-    )
+        return dpnp.get_result_array(out)
 
-    _manager.add_event_pair(h_ev, comp_ev)
-
-    return dpnp.get_result_array(out)
+    # handle a common case with broadcasting of input nan and infs
+    if is_complex:
+        parts = (x.real, x.imag)
+        parts_out = (out.real, out.imag)
+    else:
+        parts = (x,)
+        parts_out = (out,)
+
+    for part, part_out in zip(parts, parts_out):
+        nan_mask = dpnp.isnan(part)
+        posinf_mask = dpnp.isposinf(part)
+        neginf_mask = dpnp.isneginf(part)
+
+        part = dpnp.where(nan_mask, nan, part, out=part_out)
+        part = dpnp.where(posinf_mask, max_f, part, out=part_out)
+        part = dpnp.where(neginf_mask, min_f, part, out=part_out)
+    return out
 
 
 _NEGATIVE_DOCSTRING = """
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index d443b71adff8..760c1a0ceb2e 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -1480,37 +1480,35 @@ def test_boolean_array(self):
         expected = numpy.nan_to_num(a)
         assert_allclose(result, expected)
 
-    def test_errors(self):
-        ia = dpnp.array([0, 1, dpnp.nan, dpnp.inf, -dpnp.inf])
-
-        # unsupported type `a`
-        a = dpnp.asnumpy(ia)
-        assert_raises(TypeError, dpnp.nan_to_num, a)
-
-        # unsupported type `nan`
-        i_nan = dpnp.array(1)
-        assert_raises(TypeError, dpnp.nan_to_num, ia, nan=i_nan)
+    @pytest.mark.parametrize("dt", get_float_complex_dtypes())
+    @pytest.mark.parametrize("kw_name", ["nan", "posinf", "neginf"])
+    @pytest.mark.parametrize("val", [[1, 2, -1, -2, 7], (7.0,), numpy.array(1)])
+    def test_nan_infs_array_like(self, dt, kw_name, val):
+        a = numpy.array([0, 1, dpnp.nan, dpnp.inf, -dpnp.inf], dtype=dt)
+        ia = dpnp.array(a)
 
-        # unsupported type `posinf`
-        i_posinf = dpnp.array(1)
-        assert_raises(TypeError, dpnp.nan_to_num, ia, posinf=i_posinf)
+        result = dpnp.nan_to_num(ia, **{kw_name: val})
+        expected = numpy.nan_to_num(a, **{kw_name: val})
+        assert_allclose(result, expected)
 
-        # unsupported type `neginf`
-        i_neginf = dpnp.array(1)
-        assert_raises(TypeError, dpnp.nan_to_num, ia, neginf=i_neginf)
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    @pytest.mark.parametrize("kw_name", ["nan", "posinf", "neginf"])
+    def test_nan_infs_complex_dtype(self, xp, kw_name):
+        ia = xp.array([0, 1, xp.nan, xp.inf, -xp.inf])
+        with pytest.raises(TypeError, match="complex"):
+            xp.nan_to_num(ia, **{kw_name: 1j})
 
-    @pytest.mark.parametrize("kwarg", ["nan", "posinf", "neginf"])
-    @pytest.mark.parametrize("value", [1 - 0j, [1, 2], (1,)])
-    def test_errors_diff_types(self, kwarg, value):
-        ia = dpnp.array([0, 1, dpnp.nan, dpnp.inf, -dpnp.inf])
-        with pytest.raises(TypeError):
-            dpnp.nan_to_num(ia, **{kwarg: value})
+    def test_numpy_input_array(self):
+        a = numpy.array([0, 1, dpnp.nan, dpnp.inf, -dpnp.inf])
+        with pytest.raises(TypeError, match="must be any of supported type"):
+            dpnp.nan_to_num(a)
 
-    def test_error_readonly(self):
-        a = dpnp.array([0, 1, dpnp.nan, dpnp.inf, -dpnp.inf])
-        a.flags.writable = False
-        with pytest.raises(ValueError):
-            dpnp.nan_to_num(a, copy=False)
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_error_readonly(self, xp):
+        a = xp.array([0, 1, xp.nan, xp.inf, -xp.inf])
+        a.flags["W"] = False
+        with pytest.raises(ValueError, match="read-only"):
+            xp.nan_to_num(a, copy=False)
 
     @pytest.mark.parametrize("copy", [True, False])
     @pytest.mark.parametrize("dt", get_all_dtypes(no_bool=True, no_none=True))
@@ -1522,9 +1520,9 @@ def test_strided(self, copy, dt):
         if dt.kind in "fc":
             a[::4] = numpy.nan
             ia[::4] = dpnp.nan
+
         result = dpnp.nan_to_num(ia[::-2], copy=copy, nan=57.0)
         expected = numpy.nan_to_num(a[::-2], copy=copy, nan=57.0)
-
         assert_dtype_allclose(result, expected)
 
 
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_misc.py b/dpnp/tests/third_party/cupy/math_tests/test_misc.py
index c04a4cbc306d..e2f12ae373a6 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_misc.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_misc.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 import numpy
 import pytest
 
 import dpnp as cupy
-from dpnp.tests.helper import has_support_aspect64
+from dpnp.tests.helper import has_support_aspect64, numpy_version
 from dpnp.tests.third_party.cupy import testing
 
 
@@ -155,10 +157,7 @@ def test_external_clip4(self, dtype):
             # (min or max) as a keyword argument according to Python Array API.
             # In older versions of numpy, both arguments must be positional;
             # passing only one raises a TypeError.
-            if (
-                xp is numpy
-                and numpy.lib.NumpyVersion(numpy.__version__) < "2.1.0"
-            ):
+            if xp is numpy and numpy_version() < "2.1.0":
                 with pytest.raises(TypeError):
                     xp.clip(a, 3)
             else:
@@ -257,9 +256,10 @@ def test_nan_to_num_inf(self):
     def test_nan_to_num_nan(self):
         self.check_unary_nan("nan_to_num")
 
-    @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64())
+    @pytest.mark.skip("no scalar support")
+    @testing.numpy_cupy_allclose(atol=1e-5)
     def test_nan_to_num_scalar_nan(self, xp):
-        return xp.nan_to_num(xp.array(xp.nan))
+        return xp.nan_to_num(xp.nan)
 
     @pytest.mark.filterwarnings("ignore::RuntimeWarning")
     def test_nan_to_num_inf_nan(self):
@@ -286,14 +286,44 @@ def test_nan_to_num_inplace(self, xp):
         return y
 
     @pytest.mark.parametrize("kwarg", ["nan", "posinf", "neginf"])
-    def test_nan_to_num_broadcast(self, kwarg):
+    @testing.numpy_cupy_array_equal()
+    def test_nan_to_num_broadcast_same_shapes(self, xp, kwarg):
+        x = xp.asarray(
+            [[0, 1, xp.nan, 4], [11, xp.inf, 12, 13]],
+            dtype=cupy.default_float_type(),
+        )
+        y = xp.zeros((2, 4), dtype=x.dtype)
+        return xp.nan_to_num(x, **{kwarg: y})
+
+    @pytest.mark.parametrize("kwarg", ["nan", "posinf", "neginf"])
+    @testing.numpy_cupy_array_equal()
+    def test_nan_to_num_broadcast_different_columns(self, xp, kwarg):
+        x = xp.asarray(
+            [[0, 1, xp.nan, 4], [11, xp.inf, 12, 13]],
+            dtype=cupy.default_float_type(),
+        )
+        y = xp.zeros((2, 1), dtype=x.dtype)
+        return xp.nan_to_num(x, **{kwarg: y})
+
+    @pytest.mark.parametrize("kwarg", ["nan", "posinf", "neginf"])
+    @testing.numpy_cupy_array_equal()
+    def test_nan_to_num_broadcast_different_rows(self, xp, kwarg):
+        x = xp.asarray(
+            [[0, 1, xp.nan, 4], [11, -xp.inf, 12, 13]],
+            dtype=cupy.default_float_type(),
+        )
+        y = xp.zeros((1, 4), dtype=x.dtype)
+        return xp.nan_to_num(x, **{kwarg: y})
+
+    @pytest.mark.parametrize("kwarg", ["nan", "posinf", "neginf"])
+    def test_nan_to_num_broadcast_invalid_shapes(self, kwarg):
         for xp in (numpy, cupy):
             x = xp.asarray([0, 1, xp.nan, 4], dtype=cupy.default_float_type())
-            y = xp.zeros((2, 4), dtype=cupy.default_float_type())
-            with pytest.raises((ValueError, TypeError)):
+            y = xp.zeros((2, 4), dtype=x.dtype)
+            with pytest.raises(ValueError):
                 xp.nan_to_num(x, **{kwarg: y})
-            with pytest.raises((ValueError, TypeError)):
-                xp.nan_to_num(0.0, **{kwarg: y})
+            with pytest.raises(ValueError):
+                xp.nan_to_num(xp.array(0.0), **{kwarg: y})
 
     @testing.for_all_dtypes(no_bool=True, no_complex=True)
     @testing.numpy_cupy_array_equal()

From 70832ef9c64fa83b7407202ec8d83619e91e0241 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 10 Feb 2026 11:02:36 +0100
Subject: [PATCH 10/18] Bump `anaconda-client` version to `1.14.1` (#2763)

This PR bumps `anaconda-client` version from 1.14.1 to 1.14.0.

Otherwise the upload step in `Conda package` workflow is failing with:
```bash
Traceback (most recent call last):
  File "/home/runner/miniconda3/envs/upload/bin/anaconda", line 6, in <module>
    from anaconda_cli_base.cli import app
  File "/home/runner/miniconda3/envs/upload/lib/python3.13/site-packages/anaconda_cli_base/cli.py", line 229, in <module>
    load_registered_subcommands(app)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^
  File "/home/runner/miniconda3/envs/upload/lib/python3.13/site-packages/anaconda_cli_base/plugins.py", line 282, in load_registered_subcommands
    subcommand_entry_points = _load_entry_points_for_group(PLUGIN_GROUP_NAME)
  File "/home/runner/miniconda3/envs/upload/lib/python3.13/site-packages/anaconda_cli_base/plugins.py", line 57, in _load_entry_points_for_group
    module: typer.Typer = entry_point.load()
                          ~~~~~~~~~~~~~~~~^^
  File "/home/runner/miniconda3/envs/upload/lib/python3.13/importlib/metadata/__init__.py", line 179, in load
    module = import_module(match.group('module'))
  File "/home/runner/miniconda3/envs/upload/lib/python3.13/importlib/__init__.py", line 88, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/runner/miniconda3/envs/upload/lib/python3.13/site-packages/binstar_client/__init__.py", line 16, in <module>
    from pkg_resources import parse_version as pv
ModuleNotFoundError: No module named 'pkg_resources'
```
Due to the latest version `82.0.0` of `setuptools` package is using now
in the env.
---
 environments/upload_cleanup_conda_pkg.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/upload_cleanup_conda_pkg.yml b/environments/upload_cleanup_conda_pkg.yml
index b0f96cadc61a..0acf9fcd67ce 100644
--- a/environments/upload_cleanup_conda_pkg.yml
+++ b/environments/upload_cleanup_conda_pkg.yml
@@ -3,4 +3,4 @@ channels:
   - conda-forge
 dependencies:
   - python=3.13
-  - anaconda-client=1.13.1
+  - anaconda-client=1.14.1

From af6205f83be315531def9ea7c81ff0d1f3a1e0fe Mon Sep 17 00:00:00 2001
From: vchamarthi <123653452+vchamarthi@users.noreply.github.com>
Date: Wed, 11 Feb 2026 12:47:31 -0600
Subject: [PATCH 11/18] Add infra pytest warnings plugin (#2757)

This PR introduce the plugin feature to capture and format pytest
warnings to process it in internal CI.
These changes will not add any overhead to existing pytest scope. This
feature can be fully enabled/disabled by env var -
`DPNP_INFRA_WARNINGS_ENABLE`
---
 dpnp/tests/config.py              |  12 ++
 dpnp/tests/conftest.py            |   3 +
 dpnp/tests/infra_warning_utils.py | 237 ++++++++++++++++++++++++++++++
 3 files changed, 252 insertions(+)
 create mode 100644 dpnp/tests/infra_warning_utils.py

diff --git a/dpnp/tests/config.py b/dpnp/tests/config.py
index 8f7555a4ef62..a49fd8cad250 100644
--- a/dpnp/tests/config.py
+++ b/dpnp/tests/config.py
@@ -4,3 +4,15 @@
 float16_types = bool(os.getenv("DPNP_TEST_FLOAT_16", 0))
 complex_types = bool(os.getenv("DPNP_TEST_COMPLEX_TYPES", 0))
 bool_types = bool(os.getenv("DPNP_TEST_BOOL_TYPES", 0))
+
+
+infra_warnings_enable = bool(os.getenv("DPNP_INFRA_WARNINGS_ENABLE", 0))
+infra_warnings_directory = os.getenv("DPNP_INFRA_WARNINGS_DIRECTORY", None)
+infra_warnings_events_artifact = os.getenv(
+    "DPNP_INFRA_WARNINGS_EVENTS_ARTIFACT",
+    "dpnp_infra_warnings_events.jsonl",
+)
+infra_warnings_summary_artifact = os.getenv(
+    "DPNP_INFRA_WARNINGS_SUMMARY_ARTIFACT",
+    "dpnp_infra_warnings_summary.json",
+)
diff --git a/dpnp/tests/conftest.py b/dpnp/tests/conftest.py
index bd6c71f9a92b..5d766566bca5 100644
--- a/dpnp/tests/conftest.py
+++ b/dpnp/tests/conftest.py
@@ -44,6 +44,7 @@
 import dpnp
 
 from .helper import get_dev_id
+from .infra_warning_utils import register_infra_warnings_plugin_if_enabled
 
 skip_mark = pytest.mark.skip(reason="Skipping test.")
 
@@ -114,6 +115,8 @@ def pytest_configure(config):
         "ignore:invalid value encountered in arccosh:RuntimeWarning",
     )
 
+    register_infra_warnings_plugin_if_enabled(config)
+
 
 def pytest_collection_modifyitems(config, items):
     test_path = os.path.split(__file__)[0]
diff --git a/dpnp/tests/infra_warning_utils.py b/dpnp/tests/infra_warning_utils.py
new file mode 100644
index 000000000000..94a5601a2baf
--- /dev/null
+++ b/dpnp/tests/infra_warning_utils.py
@@ -0,0 +1,237 @@
+import json
+import os
+import sys
+from collections import Counter
+from pathlib import Path
+
+import dpctl
+import numpy
+
+import dpnp
+
+from . import config as warn_config
+
+
+def _origin_from_filename(filename: str) -> str:
+    file = (filename or "").replace("\\", "/")
+    if "/dpnp/" in file or file.startswith("dpnp/"):
+        return "dpnp"
+    if "/numpy/" in file or file.startswith("numpy/"):
+        return "numpy"
+    if "/dpctl/" in file or file.startswith("dpctl/"):
+        return "dpctl"
+    return "third_party"
+
+
+def _json_dumps_one_line(obj) -> str:
+    return json.dumps(obj, separators=(",", ":"))
+
+
+class DpnpInfraWarningsPlugin:
+    """Pytest custom plugin that records pytest-captured warnings.
+
+    It only records what pytest already captures (via pytest_warning_recorded).
+    Does not change warnings filters.
+
+    Env vars:
+    - DPNP_INFRA_WARNINGS_ENABLE=1 (enables the plugin)
+    - DPNP_INFRA_WARNINGS_DIRECTORY=<dir> (writes artifacts)
+    - DPNP_INFRA_WARNINGS_EVENTS_ARTIFACT (optional filename)
+    - DPNP_INFRA_WARNINGS_SUMMARY_ARTIFACT (optional filename)
+    """
+
+    SUMMARY_BEGIN = "DPNP_WARNINGS_SUMMARY_BEGIN"
+    SUMMARY_END = "DPNP_WARNINGS_SUMMARY_END"
+    EVENT_PREFIX = "DPNP_WARNING_EVENT - "
+
+    def __init__(self):
+        self.enabled = warn_config.infra_warnings_enable
+        self.directory = warn_config.infra_warnings_directory
+        self.events_artifact = warn_config.infra_warnings_events_artifact
+        self.summary_artifact = warn_config.infra_warnings_summary_artifact
+
+        self._counts = Counter()
+        self._warnings = {}
+        self._totals = Counter()
+        self._env = {}
+
+        self._events_fp = None
+        self._events_file = None
+        self._summary_file = None
+
+    def _log_stdout(self, message: str) -> None:
+        try:
+            sys.stderr.write(message.rstrip("\n") + "\n")
+            sys.stderr.flush()
+        except Exception:
+            pass
+
+    def pytest_configure(self):
+        if not self.enabled:
+            return
+
+        self._env.update(
+            {
+                "numpy_version": getattr(numpy, "__version__", "unknown"),
+                "numpy_path": getattr(numpy, "__file__", "unknown"),
+                "dpnp_version": getattr(dpnp, "__version__", "unknown"),
+                "dpnp_path": getattr(dpnp, "__file__", "unknown"),
+                "dpctl_version": getattr(dpctl, "__version__", "unknown"),
+                "dpctl_path": getattr(dpctl, "__file__", "unknown"),
+                "job": os.getenv("JOB_NAME", "unknown"),
+                "build_number": os.getenv("BUILD_NUMBER", "unknown"),
+                "git_sha": os.getenv("GIT_COMMIT", "unknown"),
+            }
+        )
+
+        if self.directory:
+            try:
+                p = Path(self.directory).expanduser().resolve()
+                if p.exists() and not p.is_dir():
+                    raise ValueError(f"{p} exists and is not a directory")
+
+                p.mkdir(parents=True, exist_ok=True)
+
+                if (
+                    not self.events_artifact
+                    or Path(self.events_artifact).name != self.events_artifact
+                ):
+                    raise ValueError(
+                        f"Invalid events artifact filename: {self.events_artifact}"
+                    )
+
+                if (
+                    not self.summary_artifact
+                    or Path(self.summary_artifact).name != self.summary_artifact
+                ):
+                    raise ValueError(
+                        f"Invalid summary artifact filename: {self.summary_artifact}"
+                    )
+
+                self._events_file = p / self.events_artifact
+                self._events_fp = self._events_file.open(
+                    mode="w", encoding="utf-8", buffering=1, newline="\n"
+                )
+                self._summary_file = p / self.summary_artifact
+            except Exception as exc:
+                self._close_events_fp()
+                self._log_stdout(
+                    "DPNP infra warnings plugin: artifacts disabled "
+                    f"(failed to initialize directory/files): {exc}"
+                )
+
+    def pytest_warning_recorded(self, warning_message, when, nodeid, location):
+        if not self.enabled:
+            return
+
+        category = getattr(
+            getattr(warning_message, "category", None),
+            "__name__",
+            str(getattr(warning_message, "category", "Warning")),
+        )
+        message = str(getattr(warning_message, "message", warning_message))
+
+        filename = getattr(warning_message, "filename", None) or (
+            location[0] if location and len(location) > 0 else None
+        )
+        lineno = getattr(warning_message, "lineno", None) or (
+            location[1] if location and len(location) > 1 else None
+        )
+        func = location[2] if location and len(location) > 2 else None
+
+        origin = _origin_from_filename(filename or "")
+        key = f"{category}||{origin}||{message}"
+        self._counts[key] += 1
+        self._totals[f"category::{category}"] += 1
+        self._totals[f"origin::{origin}"] += 1
+        self._totals[f"phase::{when}"] += 1
+
+        if key not in self._warnings:
+            self._warnings[key] = {
+                "category": category,
+                "origin": origin,
+                "when": when,
+                "nodeid": nodeid,
+                "filename": filename,
+                "lineno": lineno,
+                "function": func,
+                "message": message,
+            }
+
+        event = {
+            "when": when,
+            "nodeid": nodeid,
+            "category": category,
+            "origin": origin,
+            "message": message,
+            "filename": filename,
+            "lineno": lineno,
+            "function": func,
+        }
+
+        if self._events_fp is not None:
+            try:
+                self._events_fp.write(_json_dumps_one_line(event) + "\n")
+            except Exception:
+                pass
+
+        self._log_stdout(f"{self.EVENT_PREFIX} {_json_dumps_one_line(event)}")
+
+    def pytest_terminal_summary(self, terminalreporter, exitstatus):
+        if not self.enabled:
+            return
+
+        summary = {
+            "schema_version": "1.0",
+            "exit_status": exitstatus,
+            "environment": dict(self._env),
+            "total_warning_events": int(sum(self._counts.values())),
+            "unique_warning_types": int(len(self._counts)),
+            "totals": dict(self._totals),
+            "top_unique_warnings": [
+                dict(self._warnings[k], count=c)
+                for k, c in self._counts.most_common(50)
+                if k in self._warnings
+            ],
+        }
+
+        if self._summary_file:
+            try:
+                with open(self._summary_file, "w", encoding="utf-8") as f:
+                    json.dump(summary, f, indent=2, sort_keys=True)
+                terminalreporter.write_line(
+                    f"DPNP infrastructure warnings summary written to: {self._summary_file}"
+                )
+            except Exception as exc:
+                terminalreporter.write_line(
+                    f"Failed to write DPNP infrastructure warnings summary to: {self._summary_file}. Error: {exc}"
+                )
+
+        self._close_events_fp()
+        terminalreporter.write_line(self.SUMMARY_BEGIN)
+        terminalreporter.write_line(_json_dumps_one_line(summary))
+        terminalreporter.write_line(self.SUMMARY_END)
+
+    def pytest_unconfigure(self):
+        self._close_events_fp()
+
+    def _close_events_fp(self):
+        if self._events_fp is None:
+            return
+        try:
+            self._events_fp.close()
+        finally:
+            self._events_fp = None
+
+
+def register_infra_warnings_plugin_if_enabled(config) -> None:
+    """Register infra warnings plugin if enabled via env var."""
+
+    if not warn_config.infra_warnings_enable:
+        return
+
+    plugin_name = "dpnp-infra-warnings"
+    if config.pluginmanager.get_plugin(plugin_name) is not None:
+        return
+
+    config.pluginmanager.register(DpnpInfraWarningsPlugin(), plugin_name)

From 4bc43a95a8afedacd541171bdafda466ebf2e7ca Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 14 Feb 2026 15:57:39 +0100
Subject: [PATCH 12/18] Bump github/codeql-action from 4.32.2 to 4.32.3 (#2769)

Bumps [github/codeql-action](https://github.com/github/codeql-action)
from 4.32.2 to 4.32.3.
---
 .github/workflows/openssf-scorecard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index 441b8806eef9..8bf5e86d03ed 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -72,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@45cbd0c69e560cd9e7cd7f8c32362050c9b7ded2 # v4.32.2
+        uses: github/codeql-action/upload-sarif@9e907b5e64f6b83e7804b09294d44122997950d6 # v4.32.3
         with:
           sarif_file: results.sarif

From 35d88b81648107f0ce1d3c921c6ac5546c0a43be Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 16 Feb 2026 12:32:08 +0100
Subject: [PATCH 13/18] `TestTrimZeros::test_multiple_axes` to pass on a device
 without fp64 (#2767)

The PR fixes the issue with failing `TestTrimZeros::test_multiple_axes`
test when running on a device without fp64 support.
---
 dpnp/tests/test_manipulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/tests/test_manipulation.py b/dpnp/tests/test_manipulation.py
index 82e4640830a8..8ddba08dbb92 100644
--- a/dpnp/tests/test_manipulation.py
+++ b/dpnp/tests/test_manipulation.py
@@ -1541,7 +1541,7 @@ def test_multiple_axes(self, shape, axis, trim):
 
         result = dpnp.trim_zeros(ia, axis=axis, trim=trim)
         expected = numpy.trim_zeros(a, axis=axis, trim=trim)
-        assert_array_equal(result, expected)
+        assert_dtype_allclose(result, expected)
 
     # NOTE: numpy behaves differently on 0-sized input array
     # and returns the input array with reduced shapes

From 9d6d5a50e6f1ca82e720f17f89e455f40692f04b Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 16 Feb 2026 17:23:23 +0100
Subject: [PATCH 14/18] Bump max versions in run dependencies on DPC++ compiler
 and OneMKL (#2765)

The PR bumps run dependencies on DPC++ compiler and OneMKL packages in
scope of 2026.0 release enabling.

Also it includes the workaround for the CMake compiler issue on Windows,
which has to be removed once the compiler resolve that.
---
 CMakeLists.txt                                | 11 +++++++-
 conda-recipe/bld.bat                          |  2 +-
 conda-recipe/meta.yaml                        |  2 +-
 .../cmake/Modules/IntelSYCLConfig.cmake       |  2 +-
 .../cupy/linalg_tests/test_decomposition.py   | 27 ++++++++++++++++---
 5 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9d676232f08e..6ad364b2a272 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,7 +60,16 @@ set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
 # 8. Paths stored in the CMake System Package Registry
 # 9. Paths specified by the PATHS option (assumed hard-coded guesses)
 set(path_to_cmake_dir ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules)
-find_package(IntelSYCL REQUIRED PATHS ${path_to_cmake_dir})
+# TODO: use the commented logic once the compiler resolves CMake issue CMPLRLLVM-73484
+# find_package(IntelSYCL REQUIRED PATHS ${path_to_cmake_dir})
+find_package(IntelSYCL QUIET)
+if(SYCL_LIBRARY_FOUND)
+    find_package(IntelSYCL REQUIRED)
+else()
+    # compiler CMake might have an issue and can't find SYCL_LIBRARY properly
+    # then use vendored CMake with fixed logic
+    find_package(IntelSYCL REQUIRED PATHS ${path_to_cmake_dir} NO_DEFAULT_PATH)
+endif()
 find_package(TBB REQUIRED PATHS ${path_to_cmake_dir})
 
 set(MKL_ARCH "intel64")
diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 602faf143bfa..2c79ec808814 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -13,7 +13,7 @@ if DEFINED OVERRIDE_INTEL_IPO (
   set "CMAKE_ARGS=%CMAKE_ARGS% -DCMAKE_INTERPROCEDURAL_OPTIMIZATION:BOOL=FALSE"
 )
 
-FOR %%V IN (17.0.0 17 18.0.0 18 19.0.0 19 20.0.0 20 21.0.0 21) DO @(
+FOR %%V IN (17.0.0 17 18.0.0 18 19.0.0 19 20.0.0 20 21.0.0 21 22.0.0 22) DO @(
   REM set DIR_HINT if directory exists
   IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" (
     set "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V"
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 661f44b50ed9..5ee519fc921a 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -1,4 +1,4 @@
-{% set max_compiler_and_mkl_version = environ.get("MAX_BUILD_CMPL_MKL_VERSION", "2026.0a0") %}
+{% set max_compiler_and_mkl_version = environ.get("MAX_BUILD_CMPL_MKL_VERSION", "2027.0a0") %}
 {% set required_compiler_and_mkl_version = "2025.0" %}
 {% set required_dpctl_version = "0.22.0*" %}
 
diff --git a/dpnp/backend/cmake/Modules/IntelSYCLConfig.cmake b/dpnp/backend/cmake/Modules/IntelSYCLConfig.cmake
index 31ad2ef60272..7ba269c70c08 100644
--- a/dpnp/backend/cmake/Modules/IntelSYCLConfig.cmake
+++ b/dpnp/backend/cmake/Modules/IntelSYCLConfig.cmake
@@ -329,7 +329,7 @@ if(SYCL_COMPILER)
       )
   #TODO Make an input file to configure and update the lib current version
   if(WIN32)
-    set(sycl_lib_suffix "8")
+    set(sycl_lib_suffix "9")
   else()
     set(sycl_lib_suffix "")
   endif()
diff --git a/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py b/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py
index 9948f4d0a920..c7ff275cac0c 100644
--- a/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py
+++ b/dpnp/tests/third_party/cupy/linalg_tests/test_decomposition.py
@@ -1,16 +1,26 @@
+from __future__ import annotations
+
 import unittest
 
 import numpy
 import pytest
 
 import dpnp as cupy
+
+# from cupyx import cusolver
+# from cupy.cuda import driver
+# from cupy.cuda import runtime
+# from cupy.linalg import _util
 from dpnp.tests.helper import (
+    LTS_VERSION,
     has_support_aspect64,
-    is_cpu_device,
+    is_lts_driver,
 )
 from dpnp.tests.third_party.cupy import testing
 from dpnp.tests.third_party.cupy.testing import _condition
 
+# import cupyx
+
 
 def random_matrix(shape, dtype, scale, sym=False):
     m, n = shape[-2:]
@@ -95,6 +105,8 @@ def test_decomposition(self, dtype):
         ]
     )
     def test_batched_decomposition(self, dtype):
+        # if not cusolver.check_availability("potrfBatched"):
+        #     pytest.skip("potrfBatched is not available")
         Ab1 = random_matrix((3, 5, 5), dtype, scale=(10, 10000), sym=True)
         self.check_L(Ab1)
         Ab2 = random_matrix((2, 2, 5, 5), dtype, scale=(10, 10000), sym=True)
@@ -134,9 +146,6 @@ def check_L(self, array):
             with pytest.raises(xp.linalg.LinAlgError):
                 xp.linalg.cholesky(a)
 
-    # TODO: remove skipif when MKLD-17318 is resolved
-    # _potrf does not raise an error with singular matrices on CPU.
-    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-17318")
     @testing.for_dtypes(
         [
             numpy.int32,
@@ -163,6 +172,10 @@ class TestQRDecomposition(unittest.TestCase):
 
     @testing.for_dtypes("fdFD")
     def check_mode(self, array, mode, dtype):
+        # if runtime.is_hip and driver.get_build_version() < 307:
+        #     if dtype in (numpy.complex64, numpy.complex128):
+        #         pytest.skip("ungqr unsupported")
+
         a_cpu = numpy.asarray(array, dtype=dtype)
         a_gpu = cupy.asarray(array, dtype=dtype)
         result_gpu = cupy.linalg.qr(a_gpu, mode=mode)
@@ -189,6 +202,9 @@ def test_mode(self):
         self.check_mode(numpy.random.randn(3, 3), mode=self.mode)
         self.check_mode(numpy.random.randn(5, 4), mode=self.mode)
 
+    @pytest.mark.skipif(
+        is_lts_driver(version=LTS_VERSION.V1_6), reason="SAT-8375"
+    )
     @testing.with_requires("numpy>=1.22")
     @testing.fix_random()
     def test_mode_rank3(self):
@@ -196,6 +212,9 @@ def test_mode_rank3(self):
         self.check_mode(numpy.random.randn(4, 3, 3), mode=self.mode)
         self.check_mode(numpy.random.randn(2, 5, 4), mode=self.mode)
 
+    @pytest.mark.skipif(
+        is_lts_driver(version=LTS_VERSION.V1_6), reason="SAT-8375"
+    )
     @testing.with_requires("numpy>=1.22")
     @testing.fix_random()
     def test_mode_rank4(self):

From 9c4aed22eb8956b24aa646098b50244c5a37ae40 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 16 Feb 2026 20:16:35 +0100
Subject: [PATCH 15/18] Improve `dpnp.partition` implementation (#2766)

The PR propose to improve implementation and to use `dpnp.sort` call
when
- input array has number of dimensions > 1
- input array has previously not supported integer dtype
- `axis` keyword is passed (previously not supported)
- sequence of `kth` is passed (previously not supported)
In case of `ndim > 1` previously the implementation from legacy backend
was used, which is significantly slow (see performance comparation
below). It used a copy of input data into the shared USM memory and
included computations on the host.

This PR proposes to reuse `dpnp.sort` for all the above cases.
While in case when the legacy implementation is stable and fast (for 1D
input array), it will remain, because it relays on `std::nth_element`
from OneDPL.

The benchmark results were collected on PVC with help of the below code:
```python
import dpnp, numpy as np
from dpnp.tests.helper import generate_random_numpy_array

a = generate_random_numpy_array(10**7, dtype=np.float64, seed_value=117)
ia = dpnp.array(a)
%timeit x = dpnp.partition(ia, 513); x.sycl_queue.wait()
```

Below tables contains data in case of 1D input array (shape=(10**7,)),
where the implementation path was kept the same, plus adding support of
missing integer dtypes using fallback on the sort function:
| Implementation | int32 | uint32 | int64 | uint64 | float32 | float64 |
complex64 | complex128 |

|--------|--------|--------|--------|--------|--------|--------|--------|--------|
| old (legacy backend) | 7.46 ms | not supported | 9.46 ms | not
supported | 7.39 ms | 8.92 ms | 10.9 ms | 21.2 ms |
| new (backend + sort) | 7.34 ms | 10.8 ms | 9.48 ms | 12.5 ms | 7.37 ms
| 8.89 ms | 11 ms | 21.2 ms |

The following code was used for 2D input array with shape=(10**4,
10**4):
```python
import dpnp, numpy as np
from dpnp.tests.helper import generate_random_numpy_array

a = generate_random_numpy_array((10**4, 10**4), dtype=np.float64, seed_value=117)
ia = dpnp.array(a)
%timeit x = dpnp.partition(ia, 1513); x.sycl_queue.wait()
```

In that case the new implementation is fully based on the sort call:
| Implementation | int32 | int64 | float32 | float64 | complex64 |
complex128 |
|--------|--------|--------|--------|--------|--------|--------|
| old (legacy backend) | 6.4 s | 6.89 s | 7.36 s | 7.66 s | 8.61 s | 10
s |
| new (sort) | 57.4 ms | 64.7 ms | 62.2 ms | 68 ms | 77 ms | 151 ms |
---
 CHANGELOG.md                                  |   1 +
 dpnp/backend/kernels/dpnp_krnl_sorting.cpp    |  89 +-----
 dpnp/dpnp_array.py                            |  41 ++-
 dpnp/dpnp_iface_sorting.py                    | 142 ++++++++--
 dpnp/tests/skipped_tests_cuda.tbl             | 141 ----------
 dpnp/tests/test_sort.py                       | 264 +++++++++++++++---
 .../cupy/sorting_tests/test_sort.py           |  15 +-
 7 files changed, 393 insertions(+), 300 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b2e11822431a..e98a6763c4a4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -49,6 +49,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Aligned `dpnp.trim_zeros` with NumPy 2.4 to support a tuple of integers passed with `axis` keyword [#2746](https://github.com/IntelPython/dpnp/pull/2746)
 * Aligned `strides` property of `dpnp.ndarray` with NumPy and CuPy implementations [#2747](https://github.com/IntelPython/dpnp/pull/2747)
 * Extended `dpnp.nan_to_num` to support broadcasting of `nan`, `posinf`, and `neginf` keywords [#2754](https://github.com/IntelPython/dpnp/pull/2754)
+* Changed `dpnp.partition` implementation to reuse `dpnp.sort` where it brings the performance benefit [#2766](https://github.com/IntelPython/dpnp/pull/2766)
 
 ### Deprecated
 
diff --git a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
index 8a7ee9c8418f..a1495dfb0279 100644
--- a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
@@ -70,90 +70,27 @@ DPCTLSyclEventRef dpnp_partition_c(DPCTLSyclQueueRef q_ref,
 
     sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));
 
-    if (ndim == 1) // 1d array with C-contiguous data
-    {
-        _DataType *arr = static_cast<_DataType *>(array1_in);
-        _DataType *result = static_cast<_DataType *>(result1);
+    _DataType *arr = static_cast<_DataType *>(array1_in);
+    _DataType *result = static_cast<_DataType *>(result1);
 
-        auto policy = oneapi::dpl::execution::make_device_policy<
-            dpnp_partition_c_kernel<_DataType>>(q);
+    auto policy = oneapi::dpl::execution::make_device_policy<
+        dpnp_partition_c_kernel<_DataType>>(q);
 
-        // fill the result array with data from input one
-        q.memcpy(result, arr, size * sizeof(_DataType)).wait();
+    // fill the result array with data from input one
+    q.memcpy(result, arr, size * sizeof(_DataType)).wait();
 
-        // make a partial sorting such that:
+    // note, a loop for a multidemension input array (size_ > 1) is an
+    // experimental and it isn't tested properly as for now
+    for (size_t i = 0; i < size_; i++) {
+        _DataType *bufptr = result + i * shape_[0];
+
+        // for every slice it makes a partial sorting such that:
         // 1. result[0 <= i < kth]    <= result[kth]
         // 2. result[kth <= i < size] >= result[kth]
         // event-blocking call, no need for wait()
-        std::nth_element(policy, result, result + kth, result + size,
+        std::nth_element(policy, bufptr, bufptr + kth, bufptr + size,
                          dpnp_less_comp());
-        return event_ref;
-    }
-
-    DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size, true);
-    DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, array2_in, size, true);
-    DPNPC_ptr_adapter<_DataType> result1_ptr(q_ref, result1, size, true, true);
-    _DataType *arr = input1_ptr.get_ptr();
-    _DataType *arr2 = input2_ptr.get_ptr();
-    _DataType *result = result1_ptr.get_ptr();
-
-    auto arr_to_result_event = q.memcpy(result, arr, size * sizeof(_DataType));
-    arr_to_result_event.wait();
-
-    _DataType *matrix = new _DataType[shape_[ndim - 1]];
-
-    for (size_t i = 0; i < size_; ++i) {
-        size_t ind_begin = i * shape_[ndim - 1];
-        size_t ind_end = (i + 1) * shape_[ndim - 1] - 1;
-
-        for (size_t j = ind_begin; j < ind_end + 1; ++j) {
-            size_t ind = j - ind_begin;
-            matrix[ind] = arr2[j];
-        }
-        std::partial_sort(matrix, matrix + shape_[ndim - 1],
-                          matrix + shape_[ndim - 1], dpnp_less_comp());
-        for (size_t j = ind_begin; j < ind_end + 1; ++j) {
-            size_t ind = j - ind_begin;
-            arr2[j] = matrix[ind];
-        }
     }
-
-    shape_elem_type *shape = reinterpret_cast<shape_elem_type *>(
-        sycl::malloc_shared(ndim * sizeof(shape_elem_type), q));
-    auto memcpy_event = q.memcpy(shape, shape_, ndim * sizeof(shape_elem_type));
-
-    memcpy_event.wait();
-
-    sycl::range<2> gws(size_, kth + 1);
-    auto kernel_parallel_for_func = [=](sycl::id<2> global_id) {
-        size_t j = global_id[0];
-        size_t k = global_id[1];
-
-        _DataType val = arr2[j * shape[ndim - 1] + k];
-
-        for (size_t i = 0; i < static_cast<size_t>(shape[ndim - 1]); ++i) {
-            if (result[j * shape[ndim - 1] + i] == val) {
-                _DataType change_val1 = result[j * shape[ndim - 1] + i];
-                _DataType change_val2 = result[j * shape[ndim - 1] + k];
-                result[j * shape[ndim - 1] + k] = change_val1;
-                result[j * shape[ndim - 1] + i] = change_val2;
-            }
-        }
-    };
-
-    auto kernel_func = [&](sycl::handler &cgh) {
-        cgh.depends_on({memcpy_event});
-        cgh.parallel_for<class dpnp_partition_c_kernel<_DataType>>(
-            gws, kernel_parallel_for_func);
-    };
-
-    auto event = q.submit(kernel_func);
-
-    event.wait();
-
-    delete[] matrix;
-    sycl::free(shape, q);
-
     return event_ref;
 }
 
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index f37a3a2b3be3..bb864d4444a9 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -1459,21 +1459,34 @@ def nonzero(self):
 
     def partition(self, /, kth, axis=-1, kind="introselect", order=None):
         """
-        Return a partitioned copy of an array.
+        Partially sorts the elements in the array in such a way that the value
+        of the element in k-th position is in the position it would be in a
+        sorted array. In the output array, all elements smaller than the k-th
+        element are located to the left of this element and all equal or
+        greater are located to its right. The ordering of the elements in the
+        two partitions on the either side of the k-th element in the output
+        array is undefined.
 
-        Rearranges the elements in the array in such a way that the value of
-        the element in `kth` position is in the position it would be in
-        a sorted array.
+        Refer to `dpnp.partition` for full documentation.
 
-        All elements smaller than the `kth` element are moved before this
-        element and all equal or greater are moved behind it. The ordering
-        of the elements in the two partitions is undefined.
+        kth : {int, sequence of ints}
+            Element index to partition by. The kth element value will be in its
+            final sorted position and all smaller elements will be moved before
+            it and all equal or greater elements behind it.
+            The order of all elements in the partitions is undefined. If
+            provided with a sequence of kth it will partition all elements
+            indexed by kth of them into their sorted position at once.
+        axis : int, optional
+            Axis along which to sort. The default is ``-1``, which means sort
+            along the the last axis.
 
-        Refer to `dpnp.partition` for full documentation.
+            Default: ``-1``.
 
         See Also
         --------
         :obj:`dpnp.partition` : Return a partitioned copy of an array.
+        :obj:`dpnp.argpartition` : Indirect partition.
+        :obj:`dpnp.sort` : Full sort.
 
         Examples
         --------
@@ -1481,13 +1494,19 @@ def partition(self, /, kth, axis=-1, kind="introselect", order=None):
         >>> a = np.array([3, 4, 2, 1])
         >>> a.partition(3)
         >>> a
+        array([1, 2, 3, 4]) # may vary
+
+        >>> a.partition((1, 3))
+        >>> a
         array([1, 2, 3, 4])
 
         """
 
-        self._array_obj = dpnp.partition(
-            self, kth, axis=axis, kind=kind, order=order
-        ).get_array()
+        if axis is None:
+            raise TypeError(
+                "'NoneType' object cannot be interpreted as an integer"
+            )
+        self[...] = dpnp.partition(self, kth, axis=axis, kind=kind, order=order)
 
     def prod(
         self,
diff --git a/dpnp/dpnp_iface_sorting.py b/dpnp/dpnp_iface_sorting.py
index db33a88c7488..9c5097a5f3e3 100644
--- a/dpnp/dpnp_iface_sorting.py
+++ b/dpnp/dpnp_iface_sorting.py
@@ -39,8 +39,9 @@
 
 """
 
+from collections.abc import Sequence
+
 import dpctl.tensor as dpt
-import numpy
 from dpctl.tensor._numpy_helper import normalize_axis_index
 
 import dpnp
@@ -51,7 +52,6 @@
 )
 from .dpnp_array import dpnp_array
 from .dpnp_utils import (
-    call_origin,
     map_dtype_to_device,
 )
 
@@ -147,7 +147,7 @@ def argsort(
 
     Limitations
     -----------
-    Parameters `order` is only supported with its default value.
+    Parameter `order` is only supported with its default value.
     Otherwise ``NotImplementedError`` exception will be raised.
     Sorting algorithms ``"quicksort"`` and ``"heapsort"`` are not supported.
 
@@ -201,44 +201,128 @@ def argsort(
     )
 
 
-def partition(x1, kth, axis=-1, kind="introselect", order=None):
+def partition(a, kth, axis=-1, kind="introselect", order=None):
     """
     Return a partitioned copy of an array.
 
     For full documentation refer to :obj:`numpy.partition`.
 
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Array to be sorted.
+    kth : {int, sequence of ints}
+        Element index to partition by. The k-th value of the element will be in
+        its final sorted position and all smaller elements will be moved before
+        it and all equal or greater elements behind it. The order of all
+        elements in the partitions is undefined. If provided with a sequence of
+        k-th it will partition all elements indexed by k-th of them into their
+        sorted position at once.
+    axis : {None, int}, optional
+        Axis along which to sort. If ``None``, the array is flattened before
+        sorting. The default is ``-1``, which sorts along the last axis.
+
+        Default: ``-1``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of the same type and shape as `a`.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Input `kth` is supported as :obj:`int`.
-    Parameters `axis`, `kind` and `order` are supported only with default
-    values.
+    Parameters `kind` and `order` are only supported with its default value.
+    Otherwise ``NotImplementedError`` exception will be raised.
+
+    See Also
+    --------
+    :obj:`dpnp.ndarray.partition` : Equivalent method.
+    :obj:`dpnp.argpartition` : Indirect partition.
+    :obj:`dpnp.sort` : Full sorting.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> a = np.array([7, 1, 7, 7, 1, 5, 7, 2, 3, 2, 6, 2, 3, 0])
+    >>> p = np.partition(a, 4)
+    >>> p
+    array([0, 1, 1, 2, 2, 2, 3, 3, 5, 7, 7, 7, 7, 6]) # may vary
+
+    ``p[4]`` is 2;  all elements in ``p[:4]`` are less than or equal to
+    ``p[4]``, and all elements in ``p[5:]`` are greater than or equal to
+    ``p[4]``. The partition is::
+
+        [0, 1, 1, 2], [2], [2, 3, 3, 5, 7, 7, 7, 7, 6]
+
+    The next example shows the use of multiple values passed to `kth`.
+
+    >>> p2 = np.partition(a, (4, 8))
+    >>> p2
+    array([0, 1, 1, 2, 2, 2, 3, 3, 5, 6, 7, 7, 7, 7])
+
+    ``p2[4]`` is 2  and ``p2[8]`` is 5. All elements in ``p2[:4]`` are less
+    than or equal to ``p2[4]``, all elements in ``p2[5:8]`` are greater than or
+    equal to ``p2[4]`` and less than or equal to ``p2[8]``, and all elements in
+    ``p2[9:]`` are greater than or equal to ``p2[8]``. The partition is::
+
+        [0, 1, 1, 2], [2], [2, 3, 3], [5], [6, 7, 7, 7, 7]
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if dpnp.is_cuda_backend(x1_desc.get_array()):  # pragma: no cover
-            raise NotImplementedError(
-                "Running on CUDA is currently not supported"
-            )
+    dpnp.check_supported_arrays_type(a)
 
-        if not isinstance(kth, int):
-            pass
-        elif x1_desc.ndim == 0:
-            pass
-        elif kth >= x1_desc.shape[x1_desc.ndim - 1] or x1_desc.ndim + kth < 0:
-            pass
-        elif axis != -1:
-            pass
-        elif kind != "introselect":
-            pass
-        elif order is not None:
-            pass
-        else:
-            return dpnp_partition(x1_desc, kth, axis, kind, order).get_pyobj()
+    if kind != "introselect":
+        raise NotImplementedError(
+            "`kind` keyword argument is only supported with its default value."
+        )
+    if order is not None:
+        raise NotImplementedError(
+            "`order` keyword argument is only supported with its default value."
+        )
 
-    return call_origin(numpy.partition, x1, kth, axis, kind, order)
+    if axis is None:
+        a = dpnp.ravel(a)
+        axis = -1
+
+    nd = a.ndim
+    axis = normalize_axis_index(axis, nd)
+    length = a.shape[axis]
+
+    if isinstance(kth, int):
+        kth = (kth,)
+    elif not isinstance(kth, Sequence):
+        raise TypeError(
+            f"kth must be int or sequence of ints, but got {type(kth)}"
+        )
+    elif not all(isinstance(k, int) for k in kth):
+        raise TypeError("kth is a sequence, but not all elements are integers")
+
+    nkth = len(kth)
+    if nkth == 0 or a.size == 0:
+        return dpnp.copy(a)
+
+    # validate kth
+    kth = list(kth)
+    for i in range(nkth):
+        if kth[i] < 0:
+            kth[i] += length
+
+        if not 0 <= kth[i] < length:
+            raise ValueError(f"kth(={kth[i]}) out of bounds {length}")
+
+    dt = a.dtype
+    if (
+        nd > 1
+        or nkth > 1
+        or dpnp.issubdtype(dt, dpnp.unsignedinteger)
+        or dt in (dpnp.int8, dpnp.int16)
+        or dpnp.is_cuda_backend(a.get_array())
+    ):
+        # sort is a faster path in case of ndim > 1
+        return dpnp.sort(a, axis=axis)
+
+    desc = dpnp.get_dpnp_descriptor(a, copy_when_nondefault_queue=False)
+    return dpnp_partition(desc, kth[0], axis, kind, order).get_pyobj()
 
 
 def sort(a, axis=-1, kind=None, order=None, *, descending=False, stable=None):
diff --git a/dpnp/tests/skipped_tests_cuda.tbl b/dpnp/tests/skipped_tests_cuda.tbl
index f035fba3302f..e8415a1ae410 100644
--- a/dpnp/tests/skipped_tests_cuda.tbl
+++ b/dpnp/tests/skipped_tests_cuda.tbl
@@ -660,144 +660,3 @@ tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bo
 tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_bound_2
 tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit
 tests/third_party/cupy/random_tests/test_sample.py::TestRandomIntegers2::test_goodness_of_fit_2
-
-# partition
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-bool-0]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-bool-1]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-int32-0]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-int32-1]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-int64-0]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-int64-1]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-float32-0]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-float32-1]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-float64-0]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-float64-1]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-complex64-0]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-complex64-1]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-complex128-0]
-tests/test_sort.py::test_partition[[3, 4, 2, 1]-complex128-1]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-bool-0]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-bool-1]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-int32-0]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-int32-1]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-int64-0]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-int64-1]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-0]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float64-0]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float64-1]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-complex64-0]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-complex64-1]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-complex128-0]
-tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-complex128-1]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-bool-0]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-bool-1]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-int32-0]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-int32-1]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-int64-0]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-int64-1]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-float32-0]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-float32-1]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-float64-0]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-float64-1]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-complex64-0]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-complex64-1]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-complex128-0]
-tests/test_sort.py::test_partition[[[3, 2], [1, 6]]-complex128-1]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-bool-0]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-bool-1]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-int32-0]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-int32-1]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-int64-0]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-int64-1]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-float32-0]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-float32-1]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-float64-0]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-float64-1]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-complex64-0]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-complex64-1]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-complex128-0]
-tests/test_sort.py::test_partition[[[4, 2, 3], [3, 4, 1]]-complex128-1]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-bool-0]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-bool-1]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-int32-0]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-int32-1]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-int64-0]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-int64-1]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-float32-0]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-float32-1]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-float64-0]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-float64-1]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-complex64-0]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-complex64-1]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-complex128-0]
-tests/test_sort.py::test_partition[[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]-complex128-1]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-bool-0]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-bool-1]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-int32-0]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-int32-1]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-int64-0]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-int64-1]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-float32-0]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-float32-1]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-float64-0]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-float64-1]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-complex64-0]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-complex64-1]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-complex128-0]
-tests/test_sort.py::test_partition[[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]-complex128-1]
-
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_invalid_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_sequence_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_0_{external=False, length=10}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_invalid_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_sequence_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_1_{external=False, length=20000}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_invalid_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_invalid_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_none_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_sequence_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_2_{external=True, length=10}::test_partition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_invalid_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_invalid_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_negative_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_none_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_sequence_kth
-tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{external=True, length=20000}::test_partition_zero_dim
diff --git a/dpnp/tests/test_sort.py b/dpnp/tests/test_sort.py
index 31959600a0d7..5e883c575f85 100644
--- a/dpnp/tests/test_sort.py
+++ b/dpnp/tests/test_sort.py
@@ -11,6 +11,7 @@
     get_all_dtypes,
     get_complex_dtypes,
     get_float_dtypes,
+    get_integer_dtypes,
 )
 from .third_party.cupy import testing
 
@@ -275,6 +276,232 @@ def test_v_scalar(self):
         assert_equal(result, expected)
 
 
+class TestPartition:
+    @pytest.mark.parametrize("data", [[2, 1], [1, 2], [1, 1]])
+    @pytest.mark.parametrize("kth", [0, 1])
+    def test_1d_2size(self, data, kth):
+        a = numpy.array(data)
+        ia = dpnp.array(a)
+
+        result = dpnp.partition(ia, kth)
+        expected = numpy.partition(a, kth)
+        assert_array_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "data",
+        [
+            [3, 2, 1],
+            [1, 2, 3],
+            [2, 1, 3],
+            [2, 3, 1],
+            [1, 1, 1],
+            [1, 2, 2],
+            [2, 2, 1],
+            [1, 2, 1],
+        ],
+    )
+    @pytest.mark.parametrize("kth", [0, 1, 2])
+    @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True))
+    def test_1d_3size(self, data, kth, dt):
+        a = dpnp.array(data, dtype=dt)
+        p = dpnp.partition(a, kth)
+
+        assert (p[..., 0:kth] <= p[..., kth : kth + 1]).all()
+        assert (p[..., kth : kth + 1] <= p[..., kth + 1 :]).all()
+
+    @pytest.mark.parametrize("kth", [6, 16, -6, 41, -16, 31])
+    def test_1d_reversed(self, kth):
+        a = dpnp.arange(47)[::-1]
+        p = dpnp.partition(a, kth)
+
+        assert (p[..., 0:kth] <= p[..., kth : kth + 1]).all()
+        assert (p[..., kth : kth + 1] <= p[..., kth + 1 :]).all()
+
+    @pytest.mark.parametrize("val", [4, dpnp.nan])
+    def test_1d_ones(self, val):
+        a = numpy.ones(10)
+        a[1] = val
+        ia = dpnp.array(a)
+
+        result = dpnp.partition(ia, (2, -1))
+        expected = numpy.partition(a, (2, -1))
+        assert_array_equal(result, expected)
+
+    @pytest.mark.parametrize("kth", [0, 3, 19, 20])
+    def test_1d_equal_elements(self, kth):
+        a = dpnp.array(
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                7,
+                9,
+            ]
+        )
+        p = dpnp.partition(a, kth)
+
+        assert (p[..., 0:kth] <= p[..., kth : kth + 1]).all()
+        assert (p[..., kth : kth + 1] <= p[..., kth + 1 :]).all()
+
+    @pytest.mark.parametrize("kth", [(0, 3), (-3, -1)])
+    def test_kth_iterative(self, kth):
+        a = numpy.array([3, 4, 2, 1])
+        ia = dpnp.array(a)
+
+        result = dpnp.partition(ia, kth)
+        expected = numpy.partition(a, kth)
+        assert_array_equal(result, expected)
+
+    @pytest.mark.parametrize("dt", get_integer_dtypes())
+    def test_max_min_int_values(self, dt):
+        N = 512
+        rnd = numpy.random.RandomState(1100710816)
+
+        # random data with min and max values
+        minv = numpy.iinfo(dt).min
+        maxv = numpy.iinfo(dt).max
+        a = rnd.randint(low=minv, high=maxv, size=N, dtype=dt)
+        i, j = rnd.choice(N, 2, replace=False)
+        a[i] = minv
+        a[j] = maxv
+        k = int(rnd.choice(N, 1)[0])
+
+        ia = dpnp.array(a)
+        p = dpnp.partition(ia, k)
+        assert (p[0:k] <= p[k : k + 1]).all()
+        assert (p[k : k + 1] <= p[k + 1 :]).all()
+
+        # random data with max value at the end of array
+        a = rnd.randint(low=minv, high=maxv, size=N, dtype=dt)
+        a[N - 1] = maxv
+
+        ia = dpnp.array(a)
+        p = dpnp.partition(ia, k)
+        assert (p[0:k] <= p[k : k + 1]).all()
+        assert (p[k : k + 1] <= p[k + 1 :]).all()
+
+    @pytest.mark.parametrize("dt", get_float_dtypes())
+    def test_float_values(self, dt):
+        N = 512
+        rnd = numpy.random.RandomState(1100710816)
+        a = -0.5 + rnd.random(N).astype(dt)
+        k = int(rnd.choice(N, 1)[0])
+
+        ia = dpnp.array(a)
+        p = dpnp.partition(ia, k)
+        assert (p[0:k] <= p[k : k + 1]).all()
+        assert (p[k : k + 1] <= p[k + 1 :]).all()
+
+    @pytest.mark.parametrize("axis", [0, -1, None])
+    def test_axis_1d(self, axis):
+        a = numpy.array([2, 1])
+        ia = dpnp.array(a)
+
+        result = dpnp.partition(ia, 1, axis=axis)
+        expected = numpy.partition(a, 1, axis=axis)
+        assert_array_equal(result, expected)
+
+    @pytest.mark.parametrize("kth, axis", [(1, 0), (4, 1)])
+    def test_axis_2d(self, kth, axis):
+        a = generate_random_numpy_array((2, 5))
+
+        ia = dpnp.array(a)
+        ia.partition(kth, axis=axis)
+        p = dpnp.rollaxis(ia, axis, ia.ndim)
+        assert (p[..., 0:kth] <= p[..., kth : kth + 1]).all()
+        assert (p[..., kth : kth + 1] <= p[..., kth + 1 :]).all()
+
+        ia = dpnp.array(a)
+        p = dpnp.partition(ia, kth, axis=axis)
+        p = dpnp.rollaxis(p, axis, ia.ndim)
+        assert (p[..., 0:kth] <= p[..., kth : kth + 1]).all()
+        assert (p[..., kth : kth + 1] <= p[..., kth + 1 :]).all()
+
+    @pytest.mark.parametrize("kth", [1, 9])
+    def test_axis_2d_none(self, kth):
+        a = generate_random_numpy_array((2, 5))
+        ia = dpnp.array(a)
+
+        p = dpnp.partition(ia, kth, axis=None)
+        assert (p[..., 0:kth] <= p[..., kth : kth + 1]).all()
+        assert (p[..., kth : kth + 1] <= p[..., kth + 1 :]).all()
+
+    @pytest.mark.parametrize("axis", list(range(-4, 4)) + [None])
+    def test_empty_array(self, axis):
+        a = numpy.empty((3, 2, 1, 0))
+        ia = dpnp.array(a)
+        kth = 0
+
+        result = dpnp.partition(ia, kth, axis=axis)
+        expected = numpy.partition(a, kth, axis=axis)
+        assert_equal(result, expected)
+
+    def test_empty_partition(self):
+        a = numpy.array([0, 2, 4, 6, 8, 10])
+        ia = dpnp.array(a)
+
+        ia.partition([])
+        assert_array_equal(ia, a)
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_kth_errors(self, xp):
+        a = xp.arange(10)
+        assert_raises(ValueError, a.partition, 10)
+        assert_raises(ValueError, a.partition, -11)
+        assert_raises(TypeError, a.partition, 9.0)
+        assert_raises(TypeError, a.partition, [1, 7.0])
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_kth_axis_errors(self, xp):
+        a = xp.array([2, 1])
+        assert_raises(ValueError, a.partition, 2)
+        assert_raises(AxisError, a.partition, 3, axis=1)
+        assert_raises(ValueError, xp.partition, a, 2)
+        assert_raises(AxisError, xp.partition, a, 2, axis=1)
+
+        a = xp.arange(10).reshape((2, 5))
+        assert_raises(ValueError, a.partition, 2, axis=0)
+        assert_raises(ValueError, a.partition, 11, axis=1)
+        assert_raises(TypeError, a.partition, 2, axis=None)
+        assert_raises(ValueError, xp.partition, a, 9, axis=1)
+        assert_raises(ValueError, xp.partition, a, 11, axis=None)
+
+    @pytest.mark.parametrize("xp", [dpnp, numpy])
+    def test_kth_iterative_error(self, xp):
+        a = xp.arange(17)
+        kth = (0, 1, 2, 429, 231)
+        assert_raises(ValueError, a.partition, kth)
+
+        a = xp.arange(10).reshape((2, 5))
+        assert_raises(ValueError, a.partition, kth, axis=0)
+        assert_raises(ValueError, a.partition, kth, axis=1)
+        assert_raises(ValueError, xp.partition, a, kth, axis=1)
+        assert_raises(ValueError, xp.partition, a, kth, axis=None)
+
+    def test_not_implemented_kwargs(self):
+        a = dpnp.arange(10)
+        assert_raises(NotImplementedError, a.partition, 2, kind="nonsense")
+        assert_raises(NotImplementedError, a.partition, 2, order=[])
+
+
 class TestSort:
     @pytest.mark.parametrize("kind", [None, "stable", "mergesort", "radixsort"])
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True))
@@ -407,40 +634,3 @@ def test_complex(self, dtype):
         result = dpnp.sort_complex(ia)
         expected = numpy.sort_complex(a)
         assert_equal(result, expected)
-
-
-@pytest.mark.parametrize("kth", [0, 1])
-@pytest.mark.parametrize(
-    "dtype",
-    get_all_dtypes(
-        no_none=True, no_unsigned=True, xfail_dtypes=[dpnp.int8, dpnp.int16]
-    ),
-)
-@pytest.mark.parametrize(
-    "array",
-    [
-        [3, 4, 2, 1],
-        [[1, 0], [3, 0]],
-        [[3, 2], [1, 6]],
-        [[4, 2, 3], [3, 4, 1]],
-        [[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]],
-        [
-            [[[8, 2], [3, 0]], [[5, 2], [0, 1]]],
-            [[[1, 3], [3, 1]], [[5, 2], [0, 1]]],
-        ],
-    ],
-    ids=[
-        "[3, 4, 2, 1]",
-        "[[1, 0], [3, 0]]",
-        "[[3, 2], [1, 6]]",
-        "[[4, 2, 3], [3, 4, 1]]",
-        "[[[1, -3], [3, 0]], [[5, 2], [0, 1]], [[1, 0], [0, 1]]]",
-        "[[[[8, 2], [3, 0]], [[5, 2], [0, 1]]], [[[1, 3], [3, 1]], [[5, 2], [0, 1]]]]",
-    ],
-)
-def test_partition(array, dtype, kth):
-    a = dpnp.array(array, dtype)
-    p = dpnp.partition(a, kth)
-
-    assert (p[..., 0:kth] <= p[..., kth : kth + 1]).all()
-    assert (p[..., kth : kth + 1] <= p[..., kth + 1 :]).all()
diff --git a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py
index 7e791f6a7c0e..7e0eade13254 100644
--- a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py
+++ b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import unittest
 
 import numpy
@@ -455,7 +457,6 @@ def test_sort_complex_nan(self, xp, dtype):
         }
     )
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestPartition(unittest.TestCase):
 
     def partition(self, a, kth, axis=-1):
@@ -478,9 +479,6 @@ def test_partition_zero_dim(self):
     @testing.for_all_dtypes()
     @testing.numpy_cupy_equal()
     def test_partition_one_dim(self, xp, dtype):
-        flag = xp.issubdtype(dtype, xp.unsignedinteger)
-        if flag or dtype in [xp.int8, xp.int16]:
-            pytest.skip("dpnp.partition() does not support new integer dtypes.")
         a = testing.shaped_random((self.length,), xp, dtype)
         kth = 2
         x = self.partition(a, kth)
@@ -488,7 +486,6 @@ def test_partition_one_dim(self, xp, dtype):
         assert xp.all(x[kth : kth + 1] <= x[kth + 1 :])
         return x[kth]
 
-    @pytest.mark.skip("multidimensional case doesn't work properly")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_partition_multi_dim(self, xp, dtype):
@@ -505,6 +502,12 @@ def test_partition_multi_dim(self, xp, dtype):
     def test_partition_non_contiguous(self, xp):
         a = testing.shaped_random((self.length,), xp)[::-1]
         kth = 2
+        # if not self.external:
+        #     if xp is cupy:
+        #         with self.assertRaises(NotImplementedError):
+        #             return self.partition(a, kth)
+        #     return 0  # dummy
+        # else:
         x = self.partition(a, kth)
         assert xp.all(x[0:kth] <= x[kth : kth + 1])
         assert xp.all(x[kth : kth + 1] <= x[kth + 1 :])
@@ -607,7 +610,7 @@ def test_partition_invalid_negative_axis2(self):
         }
     )
 )
-@pytest.mark.skip("not fully supported yet")
+@pytest.mark.skip("not supported yet")
 class TestArgpartition(unittest.TestCase):
 
     def argpartition(self, a, kth, axis=-1):

From ed2307939eefa1a6e108a888a6c85da6a69dd4ac Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 17 Feb 2026 14:16:02 +0100
Subject: [PATCH 16/18] Remove unused `onemkl-sycl-stats` run dependency
 (#2771)

There is no real dependency on `onemkl-sycl-stats` package. DPNP doesn't
consume any function from it.
The PR drops the unnecessary run dependency on this oneMKL conda
package.
---
 CHANGELOG.md           | 1 +
 conda-recipe/meta.yaml | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e98a6763c4a4..f7be91dfd949 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -64,6 +64,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Removed the obsolete interface from DPNP to Numba JIT [#2647](https://github.com/IntelPython/dpnp/pull/2647)
 * Removed the `newshape` parameter from `dpnp.reshape`, which has been deprecated since dpnp 0.17.0. Pass it positionally or use `shape=` on newer versions [#2670](https://github.com/IntelPython/dpnp/pull/2670)
 * Removed unused `pytest` configuration from `pyproject.toml` [#2729](https://github.com/IntelPython/dpnp/pull/2729)
+* Dropped a conda run dependency on `onemkl-sycl-stats` package [#2771](https://github.com/IntelPython/dpnp/pull/2771)
 
 ### Fixed
 
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 5ee519fc921a..956ff6db0133 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -49,7 +49,6 @@ requirements:
       - {{ pin_compatible('onemkl-sycl-dft', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('onemkl-sycl-lapack', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('onemkl-sycl-rng', min_pin='x.x', max_pin='x') }}
-      - {{ pin_compatible('onemkl-sycl-stats', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('onemkl-sycl-vm', min_pin='x.x', max_pin='x') }}
       - numpy
       - intel-gpu-ocl-icd-system

From c73e47269cb0a3013076512efb0f29d05be104b1 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 17 Feb 2026 15:55:56 +0100
Subject: [PATCH 17/18] Use Pybind11 `3.0.2` to build dpnp (#2773)

The PR updates CMakeLists.txt to pull pybind11 `3.0.2` up from `3.0.1`.
---
 CHANGELOG.md   | 1 +
 CMakeLists.txt | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7be91dfd949..23c42202e224 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Aligned `strides` property of `dpnp.ndarray` with NumPy and CuPy implementations [#2747](https://github.com/IntelPython/dpnp/pull/2747)
 * Extended `dpnp.nan_to_num` to support broadcasting of `nan`, `posinf`, and `neginf` keywords [#2754](https://github.com/IntelPython/dpnp/pull/2754)
 * Changed `dpnp.partition` implementation to reuse `dpnp.sort` where it brings the performance benefit [#2766](https://github.com/IntelPython/dpnp/pull/2766)
+* `dpnp` uses pybind11 3.0.2 [#27734](https://github.com/IntelPython/dpnp/pull/2773)
 
 ### Deprecated
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6ad364b2a272..58ba34082be2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -95,8 +95,8 @@ find_package(Python 3.10...<3.15 REQUIRED COMPONENTS Development.Module NumPy)
 include(FetchContent)
 FetchContent_Declare(
     pybind11
-    URL https://github.com/pybind/pybind11/archive/refs/tags/v3.0.1.tar.gz
-    URL_HASH SHA256=741633da746b7c738bb71f1854f957b9da660bcd2dce68d71949037f0969d0ca
+    URL https://github.com/pybind/pybind11/archive/refs/tags/v3.0.2.tar.gz
+    URL_HASH SHA256=2f20a0af0b921815e0e169ea7fec63909869323581b89d7de1553468553f6a2d
     FIND_PACKAGE_ARGS NAMES pybind11
 )
 FetchContent_MakeAvailable(pybind11)

From 30f5fd126d3ed92c9ba356bde925a3979c5acadd Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 17 Feb 2026 18:31:47 +0100
Subject: [PATCH 18/18] Suppress the compiler warnings marking them as external
 (#2770)

On Linux, the DPC++ compiler headers are automatically treated as system
headers. But it is not the case on Windows.
Due to that, there is a ton of deprecation warnings generated inside the
compiler headers when building dpnp extensions on Windows.

This PR updates CMake files of the extensions to add include of DPC++
compiler headers explicitly and to mark the compiler and dpctl headers
as system one to suppress the warning inside them.
---
 CHANGELOG.md                                      |  1 +
 dpnp/backend/extensions/blas/CMakeLists.txt       | 10 +++++-----
 dpnp/backend/extensions/fft/CMakeLists.txt        | 11 +++--------
 dpnp/backend/extensions/indexing/CMakeLists.txt   | 10 +++++-----
 dpnp/backend/extensions/lapack/CMakeLists.txt     | 10 +++++-----
 dpnp/backend/extensions/statistics/CMakeLists.txt | 14 +++++---------
 dpnp/backend/extensions/ufunc/CMakeLists.txt      | 10 +++++-----
 dpnp/backend/extensions/vm/CMakeLists.txt         | 10 +++++-----
 dpnp/backend/extensions/window/CMakeLists.txt     | 10 +++++-----
 9 files changed, 39 insertions(+), 47 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 23c42202e224..f177be311f84 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -51,6 +51,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
 * Extended `dpnp.nan_to_num` to support broadcasting of `nan`, `posinf`, and `neginf` keywords [#2754](https://github.com/IntelPython/dpnp/pull/2754)
 * Changed `dpnp.partition` implementation to reuse `dpnp.sort` where it brings the performance benefit [#2766](https://github.com/IntelPython/dpnp/pull/2766)
 * `dpnp` uses pybind11 3.0.2 [#27734](https://github.com/IntelPython/dpnp/pull/2773)
+* Modified CMake files for the extension to explicitly mark DPC++ compiler and dpctl headers as system ones and so to suppress the build warning generated inside them [#2770](https://github.com/IntelPython/dpnp/pull/2770)
 
 ### Deprecated
 
diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt
index 267567c69e71..5960dfcd8028 100644
--- a/dpnp/backend/extensions/blas/CMakeLists.txt
+++ b/dpnp/backend/extensions/blas/CMakeLists.txt
@@ -65,16 +65,16 @@ set_target_properties(
 
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
 )
+
+# treat below headers as system to suppress the warnings there during the build
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
+    SYSTEM
+    PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
 )
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
-
 if(WIN32)
     target_compile_options(
         ${python_module_name}
diff --git a/dpnp/backend/extensions/fft/CMakeLists.txt b/dpnp/backend/extensions/fft/CMakeLists.txt
index 50468857e3b9..f8f63dd7fd3b 100644
--- a/dpnp/backend/extensions/fft/CMakeLists.txt
+++ b/dpnp/backend/extensions/fft/CMakeLists.txt
@@ -57,17 +57,12 @@ set_target_properties(
     PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON
 )
 
+# treat below headers as system to suppress the warnings there during the build
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include
+    SYSTEM
+    PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
 )
-target_include_directories(
-    ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src
-)
-
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
 if(WIN32)
     target_compile_options(
diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt
index a6691f31f559..370d59f95585 100644
--- a/dpnp/backend/extensions/indexing/CMakeLists.txt
+++ b/dpnp/backend/extensions/indexing/CMakeLists.txt
@@ -62,16 +62,16 @@ set_target_properties(
 
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
 )
+
+# treat below headers as system to suppress the warnings there during the build
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
+    SYSTEM
+    PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
 )
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
-
 if(WIN32)
     target_compile_options(
         ${python_module_name}
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index 5e8b95963e94..6dee8abebeca 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -82,16 +82,16 @@ set_target_properties(
 
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
 )
+
+# treat below headers as system to suppress the warnings there during the build
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
+    SYSTEM
+    PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
 )
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
-
 if(WIN32)
     target_compile_options(
         ${python_module_name}
diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt
index 9561daf27ce2..7ccb05238ae4 100644
--- a/dpnp/backend/extensions/statistics/CMakeLists.txt
+++ b/dpnp/backend/extensions/statistics/CMakeLists.txt
@@ -67,20 +67,16 @@ set_target_properties(
 
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include
-)
-target_include_directories(
-    ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
 )
+
+# treat below headers as system to suppress the warnings there during the build
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
+    SYSTEM
+    PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
 )
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
-
 if(WIN32)
     target_compile_options(
         ${python_module_name}
diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt
index b24d5d131cfe..ae6015e11d0f 100644
--- a/dpnp/backend/extensions/ufunc/CMakeLists.txt
+++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt
@@ -84,16 +84,16 @@ set_target_properties(
 
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ ${CMAKE_CURRENT_SOURCE_DIR}/../common
 )
+
+# treat below headers as system to suppress the warnings there during the build
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
+    SYSTEM
+    PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
 )
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
-
 if(_dpnp_sycl_targets)
     # make fat binary
     target_compile_options(
diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt
index 0e3a17df77e0..7165f7b926fb 100644
--- a/dpnp/backend/extensions/vm/CMakeLists.txt
+++ b/dpnp/backend/extensions/vm/CMakeLists.txt
@@ -107,16 +107,16 @@ set_target_properties(
 
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
 )
+
+# treat below headers as system to suppress the warnings there during the build
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
+    SYSTEM
+    PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
 )
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
-
 if(WIN32)
     target_compile_options(
         ${python_module_name}
diff --git a/dpnp/backend/extensions/window/CMakeLists.txt b/dpnp/backend/extensions/window/CMakeLists.txt
index fc446f523e74..0cebfe79b2de 100644
--- a/dpnp/backend/extensions/window/CMakeLists.txt
+++ b/dpnp/backend/extensions/window/CMakeLists.txt
@@ -62,16 +62,16 @@ set_target_properties(
 
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../
+    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ ${CMAKE_CURRENT_SOURCE_DIR}/../common
 )
+
+# treat below headers as system to suppress the warnings there during the build
 target_include_directories(
     ${python_module_name}
-    PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common
+    SYSTEM
+    PRIVATE ${SYCL_INCLUDE_DIR} ${Dpctl_INCLUDE_DIRS} ${Dpctl_TENSOR_INCLUDE_DIR}
 )
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
-
 if(WIN32)
     target_compile_options(
         ${python_module_name}