Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def filter(self, sample: da.Array):
lambda x: ((da.count_nonzero(x == self._value) / math.prod(x.shape)) * 100) >= self._percentage
) # noqa

if not function(sample):
if function(sample):
raise PipelineFilterException(sample, f"Data contained more than {self._percentage}% of {self._value}.")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def filter(self, sample: np.ndarray):
lambda x: ((np.count_nonzero(x == self._value) / math.prod(x.shape)) * 100) >= self._percentage
) # noqa

if not function(sample):
if function(sample):
raise PipelineFilterException(sample, f"Data contained more than {self._percentage}% of {self._value}.")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def filter(self, sample: T):
else:
raise TypeError("This filter only accepts xr.DataArray or xr.Dataset")

if not drop:
if drop:
raise PipelineFilterException(sample, f"Data contained more than {self._percentage}% of {self._value}.")


Expand Down
20 changes: 10 additions & 10 deletions packages/pipeline/tests/operations/dask/test_dask_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,24 +62,24 @@ def test_DropValue():

original = da.from_array([[0, 0], [1, 2]])

# drop case (num zeros < threshold)
# non-drop case (num zeros < threshold)
drop = filters.DropValue(0, 75)
with pytest.raises(PipelineFilterException):
drop.filter(original)
drop.filter(original)

# non-drop case (num zeros >= threshold)
# drop case (num zeros >= threshold)
drop = filters.DropValue(0, 50)
drop.filter(original)
with pytest.raises(PipelineFilterException):
drop.filter(original)

# drop case (num nans < threshold)
# non-drop case (num nans < threshold)
original = da.from_array([[np.nan, np.nan], [1, 2]])
drop = filters.DropValue("nan", 75)
with pytest.raises(PipelineFilterException):
drop.filter(original)
drop.filter(original)

# non-drop case (num nans >= threshold)
# drop case (num nans >= threshold)
drop = filters.DropValue("nan", 50)
drop.filter(original)
with pytest.raises(PipelineFilterException):
drop.filter(original)


def test_Shape():
Expand Down
18 changes: 9 additions & 9 deletions packages/pipeline/tests/operations/numpy/test_numpy_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,22 @@ def test_DropValue():

drop = filters.DropValue(value=1, percentage=75)

with pytest.raises(PipelineFilterException):
drop.filter(original)
drop.filter(original)

# test no drop case
# test drop case
drop = filters.DropValue(value=1, percentage=50)
drop.filter(original)
with pytest.raises(PipelineFilterException):
drop.filter(original)

# test with nan - drop case
# test with nan - non-drop case
drop = filters.DropValue(value="nan", percentage=75)

with pytest.raises(PipelineFilterException):
drop.filter(original)
drop.filter(original)

# no drop case
# drop case
drop = filters.DropValue(value="nan", percentage=50)
drop.filter(original)
with pytest.raises(PipelineFilterException):
drop.filter(original)


def test_Shape():
Expand Down
34 changes: 17 additions & 17 deletions packages/pipeline/tests/operations/xarray/test_xarray_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,42 +97,42 @@ def test_DropValue():
{"var1": xr.DataArray(np.array([[1, 1], [3, 4]])), "var2": xr.DataArray(np.array([[np.nan, np.nan], [6, 7]]))}
)

# check var1 of dataset drop case
# check var1 of dataset non-drop case
drop = filters.DropValue(1, 75)
with pytest.raises(PipelineFilterException):
drop.filter(original["var1"])
drop.filter(original["var1"])

# check var1 of dataset non-drop case
# check var1 of dataset drop case
drop = filters.DropValue(1, 50)
drop.filter(original["var1"])
with pytest.raises(PipelineFilterException):
drop.filter(original["var1"])

# check var2 of dataset drop case (using nan)
# check var2 of dataset non-drop case (using nan)
drop = filters.DropValue("nan", 75)
drop.filter(original["var2"])

# check var2 of dataset drop case
drop = filters.DropValue("nan", 50)
with pytest.raises(PipelineFilterException):
drop.filter(original["var2"])

# check var2 of dataset non-drop case
drop = filters.DropValue("nan", 50)
drop.filter(original["var2"])
# check whole dataset non-drop case
drop = filters.DropValue(1, 50)
drop.filter(original)

# check whole dataset drop case
drop = filters.DropValue(1, 50)
drop = filters.DropValue(1, 10)
with pytest.raises(PipelineFilterException):
drop.filter(original)

# check whole dataset non-drop case
drop = filters.DropValue(1, 10)
# check whole dataset nan non-drop case
drop = filters.DropValue("nan", 50)
drop.filter(original)

# check whole dataset nan drop case
drop = filters.DropValue("nan", 50)
drop = filters.DropValue("nan", 10)
with pytest.raises(PipelineFilterException):
drop.filter(original)

# check whole dataset nan non-drop case
drop = filters.DropValue("nan", 10)
drop.filter(original)

# check invalid type
with pytest.raises(TypeError):
drop.filter(np.empty((1, 1)))
Expand Down