From 0498903afd5d6ae2f01c1b0b7e278ac0279fb150 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Thu, 8 Jan 2026 11:08:27 +1100 Subject: [PATCH] reverse logic for DropValue filters This aligns the logic with the doc strings. Now a PipelineFilterException is raised when the number of values in the input is equal to or greater than the supplied percentage threshold. Before, the exception would be raised if the number of values is strictly less than the supplied threshold. --- .../pipeline/operations/dask/filters.py | 2 +- .../pipeline/operations/numpy/filters.py | 2 +- .../pipeline/operations/xarray/filters.py | 2 +- .../tests/operations/dask/test_dask_filter.py | 20 +++++------ .../operations/numpy/test_numpy_filter.py | 18 +++++----- .../operations/xarray/test_xarray_filter.py | 34 +++++++++---------- 6 files changed, 39 insertions(+), 39 deletions(-) diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/dask/filters.py b/packages/pipeline/src/pyearthtools/pipeline/operations/dask/filters.py index 756edbfc..05b7bfee 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/dask/filters.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/dask/filters.py @@ -133,7 +133,7 @@ def filter(self, sample: da.Array): lambda x: ((da.count_nonzero(x == self._value) / math.prod(x.shape)) * 100) >= self._percentage ) # noqa - if not function(sample): + if function(sample): raise PipelineFilterException(sample, f"Data contained more than {self._percentage}% of {self._value}.") diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/filters.py b/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/filters.py index 03146787..7b5ff64d 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/filters.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/filters.py @@ -135,7 +135,7 @@ def filter(self, sample: np.ndarray): lambda x: ((np.count_nonzero(x == self._value) / math.prod(x.shape)) * 100) >= self._percentage ) # noqa - if not function(sample): + if function(sample): raise PipelineFilterException(sample, f"Data contained more than {self._percentage}% of {self._value}.") diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/filters.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/filters.py index 58c59b93..26fde183 100644 --- a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/filters.py +++ b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/filters.py @@ -185,7 +185,7 @@ def filter(self, sample: T): else: raise TypeError("This filter only accepts xr.DataArray or xr.Dataset") - if not drop: + if drop: raise PipelineFilterException(sample, f"Data contained more than {self._percentage}% of {self._value}.") diff --git a/packages/pipeline/tests/operations/dask/test_dask_filter.py b/packages/pipeline/tests/operations/dask/test_dask_filter.py index 4af1895a..dc97c9d1 100644 --- a/packages/pipeline/tests/operations/dask/test_dask_filter.py +++ b/packages/pipeline/tests/operations/dask/test_dask_filter.py @@ -62,24 +62,24 @@ def test_DropValue(): original = da.from_array([[0, 0], [1, 2]]) - # drop case (num zeros < threshold) + # non-drop case (num zeros < threshold) drop = filters.DropValue(0, 75) - with pytest.raises(PipelineFilterException): - drop.filter(original) + drop.filter(original) - # non-drop case (num zeros >= threshold) + # drop case (num zeros >= threshold) drop = filters.DropValue(0, 50) - drop.filter(original) + with pytest.raises(PipelineFilterException): + drop.filter(original) - # drop case (num nans < threshold) + # non-drop case (num nans < threshold) original = da.from_array([[np.nan, np.nan], [1, 2]]) drop = filters.DropValue("nan", 75) - with pytest.raises(PipelineFilterException): - drop.filter(original) + drop.filter(original) - # non-drop case (num nans >= threshold) + # drop case (num nans >= threshold) drop = filters.DropValue("nan", 50) - drop.filter(original) + with pytest.raises(PipelineFilterException): + drop.filter(original) def test_Shape(): diff --git a/packages/pipeline/tests/operations/numpy/test_numpy_filter.py b/packages/pipeline/tests/operations/numpy/test_numpy_filter.py index b9f5e330..374f873f 100644 --- a/packages/pipeline/tests/operations/numpy/test_numpy_filter.py +++ b/packages/pipeline/tests/operations/numpy/test_numpy_filter.py @@ -64,22 +64,22 @@ def test_DropValue(): drop = filters.DropValue(value=1, percentage=75) - with pytest.raises(PipelineFilterException): - drop.filter(original) + drop.filter(original) - # test no drop case + # test drop case drop = filters.DropValue(value=1, percentage=50) - drop.filter(original) + with pytest.raises(PipelineFilterException): + drop.filter(original) - # test with nan - drop case + # test with nan - non-drop case drop = filters.DropValue(value="nan", percentage=75) - with pytest.raises(PipelineFilterException): - drop.filter(original) + drop.filter(original) - # no drop case + # drop case drop = filters.DropValue(value="nan", percentage=50) - drop.filter(original) + with pytest.raises(PipelineFilterException): + drop.filter(original) def test_Shape(): diff --git a/packages/pipeline/tests/operations/xarray/test_xarray_filter.py b/packages/pipeline/tests/operations/xarray/test_xarray_filter.py index 21cb8fd6..be2f482c 100644 --- a/packages/pipeline/tests/operations/xarray/test_xarray_filter.py +++ b/packages/pipeline/tests/operations/xarray/test_xarray_filter.py @@ -97,42 +97,42 @@ def test_DropValue(): {"var1": xr.DataArray(np.array([[1, 1], [3, 4]])), "var2": xr.DataArray(np.array([[np.nan, np.nan], [6, 7]]))} ) - # check var1 of dataset drop case + # check var1 of dataset non-drop case drop = filters.DropValue(1, 75) - with pytest.raises(PipelineFilterException): - drop.filter(original["var1"]) + drop.filter(original["var1"]) - # check var1 of dataset non-drop case + # check var1 of dataset drop case drop = filters.DropValue(1, 50) - drop.filter(original["var1"]) + with pytest.raises(PipelineFilterException): + drop.filter(original["var1"]) - # check var2 of dataset drop case (using nan) + # check var2 of dataset non-drop case (using nan) drop = filters.DropValue("nan", 75) + drop.filter(original["var2"]) + + # check var2 of dataset drop case + drop = filters.DropValue("nan", 50) with pytest.raises(PipelineFilterException): drop.filter(original["var2"]) - # check var2 of dataset non-drop case - drop = filters.DropValue("nan", 50) - drop.filter(original["var2"]) + # check whole dataset non-drop case + drop = filters.DropValue(1, 50) + drop.filter(original) # check whole dataset drop case - drop = filters.DropValue(1, 50) + drop = filters.DropValue(1, 10) with pytest.raises(PipelineFilterException): drop.filter(original) - # check whole dataset non-drop case - drop = filters.DropValue(1, 10) + # check whole dataset nan non-drop case + drop = filters.DropValue("nan", 50) drop.filter(original) # check whole dataset nan drop case - drop = filters.DropValue("nan", 50) + drop = filters.DropValue("nan", 10) with pytest.raises(PipelineFilterException): drop.filter(original) - # check whole dataset nan non-drop case - drop = filters.DropValue("nan", 10) - drop.filter(original) - # check invalid type with pytest.raises(TypeError): drop.filter(np.empty((1, 1)))