Skip to content

Commit 462ba2b

Browse files
author
Rafał Hibner
committed
Filter node with not null assertion
1 parent f6bfa7b commit 462ba2b

File tree

2 files changed

+32
-3
lines changed

2 files changed

+32
-3
lines changed

cpp/src/arrow/acero/filter_node.cc

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ namespace arrow {
3333

3434
using internal::checked_cast;
3535

36+
using compute::and_;
37+
using compute::field_ref;
3638
using compute::FilterOptions;
39+
using compute::is_null;
40+
using compute::not_;
3741

3842
namespace acero {
3943
namespace {
@@ -51,14 +55,34 @@ class FilterNode : public MapNode {
5155
auto schema = inputs[0]->output_schema();
5256

5357
const auto& filter_options = checked_cast<const FilterNodeOptions&>(options);
58+
Expression filter_expression;
59+
if (!filter_options.filter_not_null.empty()) {
60+
std::vector<Expression> operands = {filter_options.filter_expression};
61+
for (const auto& c : filter_options.filter_not_null) {
62+
auto field_index = schema->GetFieldIndex(c);
63+
if (field_index == -1) {
64+
return Status::KeyError("Column not found: ", c);
65+
}
66+
auto& field = schema->field(field_index);
67+
if (field->nullable()) {
68+
ARROW_ASSIGN_OR_RAISE(
69+
schema, schema->SetField(field_index, field->WithNullable(false)));
70+
}
71+
operands.push_back(not_(is_null(field_ref(c))));
72+
}
73+
filter_expression = and_(operands);
74+
} else {
75+
filter_expression = filter_options.filter_expression;
76+
}
5477

55-
auto filter_expression = filter_options.filter_expression;
5678
if (!filter_expression.IsBound()) {
5779
ARROW_ASSIGN_OR_RAISE(
5880
filter_expression,
5981
filter_expression.Bind(*schema, plan->query_context()->exec_context()));
6082
}
6183

84+
std::vector<std::string> filter_not_null;
85+
6286
if (filter_expression.type()->id() != Type::BOOL) {
6387
return Status::TypeError("Filter expression must evaluate to bool, but ",
6488
filter_expression.ToString(), " evaluates to ",

cpp/src/arrow/acero/options.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,13 +250,18 @@ class ARROW_ACERO_EXPORT RecordBatchSourceNodeOptions
250250
class ARROW_ACERO_EXPORT FilterNodeOptions : public ExecNodeOptions {
251251
public:
252252
/// \brief create an instance from values
253-
explicit FilterNodeOptions(Expression filter_expression)
254-
: filter_expression(std::move(filter_expression)) {}
253+
explicit FilterNodeOptions(Expression filter_expression,
254+
std::vector<std::string> filter_not_null = {})
255+
: filter_expression(std::move(filter_expression)),
256+
filter_not_null(std::move(filter_not_null)) {}
255257

256258
/// \brief the expression to filter batches
257259
///
258260
/// The return type of this expression must be boolean
259261
Expression filter_expression;
262+
263+
/// \brief filter out null values from selected columns and assert not null in schema
264+
std::vector<std::string> filter_not_null;
260265
};
261266

262267
/// \brief a node which selects a specified subset from the input

0 commit comments

Comments
 (0)