Skip to content

Commit a5acbaa

Browse files
author
Innocent
committed
feat: add json serde for expressions
1 parent 43b83c5 commit a5acbaa

File tree

4 files changed

+516
-30
lines changed

4 files changed

+516
-30
lines changed

src/iceberg/expression/json_serde.cc

Lines changed: 273 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,33 @@
1717
* under the License.
1818
*/
1919

20-
#include <format>
2120
#include <ranges>
2221
#include <string>
23-
#include <utility>
2422
#include <vector>
2523

2624
#include <nlohmann/json.hpp>
2725

2826
#include "iceberg/expression/json_serde_internal.h"
2927
#include "iceberg/expression/literal.h"
28+
#include "iceberg/expression/predicate.h"
29+
#include "iceberg/expression/term.h"
30+
#include "iceberg/transform.h"
3031
#include "iceberg/util/checked_cast.h"
3132
#include "iceberg/util/json_util_internal.h"
3233
#include "iceberg/util/macros.h"
34+
#include "iceberg/util/transform_util.h"
3335

3436
namespace iceberg {
3537
namespace {
38+
// JSON field names
39+
constexpr std::string_view kType = "type";
40+
constexpr std::string_view kTerm = "term";
41+
constexpr std::string_view kTransform = "transform";
42+
constexpr std::string_view kValue = "value";
43+
constexpr std::string_view kValues = "values";
44+
constexpr std::string_view kLeft = "left";
45+
constexpr std::string_view kRight = "right";
46+
constexpr std::string_view kChild = "child";
3647
// Expression type strings
3748
constexpr std::string_view kTypeTrue = "true";
3849
constexpr std::string_view kTypeFalse = "false";
@@ -58,6 +69,43 @@ constexpr std::string_view kTypeCountNull = "count-null";
5869
constexpr std::string_view kTypeCountStar = "count-star";
5970
constexpr std::string_view kTypeMin = "min";
6071
constexpr std::string_view kTypeMax = "max";
72+
73+
/// Helper to check if a JSON term represents a transform
74+
bool IsTransformTerm(const nlohmann::json& json) {
75+
return json.is_object() && json.contains(kType) &&
76+
json[kType].get<std::string>() == kTransform && json.contains(kTerm);
77+
}
78+
79+
/// Template helper to create predicates from JSON with the appropriate term type
80+
template <typename B>
81+
Result<std::unique_ptr<UnboundPredicate>> MakePredicateFromJson(
82+
Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
83+
const nlohmann::json& json) {
84+
if (IsUnaryOperation(op)) {
85+
return UnboundPredicateImpl<B>::Make(op, std::move(term));
86+
}
87+
88+
if (IsSetOperation(op)) {
89+
std::vector<Literal> literals;
90+
if (!json.contains(kValues) || !json[kValues].is_array()) [[unlikely]] {
91+
return JsonParseError("Missing or invalid 'values' field for set operation: {}",
92+
SafeDumpJson(json));
93+
}
94+
for (const auto& val : json[kValues]) {
95+
ICEBERG_ASSIGN_OR_RAISE(auto lit, LiteralFromJson(val));
96+
literals.push_back(std::move(lit));
97+
}
98+
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literals));
99+
}
100+
101+
// Literal predicate
102+
if (!json.contains(kValue)) [[unlikely]] {
103+
return JsonParseError("Missing 'value' field for literal predicate: {}",
104+
SafeDumpJson(json));
105+
}
106+
ICEBERG_ASSIGN_OR_RAISE(auto literal, LiteralFromJson(json[kValue]));
107+
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literal));
108+
}
61109
} // namespace
62110

63111
bool IsUnaryOperation(Expression::Operation op) {
@@ -83,7 +131,7 @@ bool IsSetOperation(Expression::Operation op) {
83131
}
84132

85133
Result<Expression::Operation> OperationTypeFromJson(const nlohmann::json& json) {
86-
if (!json.is_string()) {
134+
if (!json.is_string()) [[unlikely]] {
87135
return JsonParseError("Unable to create operation. Json value is not a string");
88136
}
89137
auto typeStr = json.get<std::string>();
@@ -123,27 +171,243 @@ nlohmann::json ToJson(Expression::Operation op) {
123171
return json;
124172
}
125173

174+
nlohmann::json ToJson(const NamedReference& ref) { return ref.name(); }
175+
176+
Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
177+
const nlohmann::json& json) {
178+
if (!json.is_string()) [[unlikely]] {
179+
return JsonParseError("Expected string for named reference");
180+
}
181+
return NamedReference::Make(json.get<std::string>());
182+
}
183+
184+
nlohmann::json ToJson(const UnboundTransform& transform) {
185+
auto& mutable_transform = const_cast<UnboundTransform&>(transform);
186+
nlohmann::json json;
187+
json[kType] = kTransform;
188+
json[kTransform] = transform.transform()->ToString();
189+
json[kTerm] = mutable_transform.reference()->name();
190+
return json;
191+
}
192+
193+
Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson(
194+
const nlohmann::json& json) {
195+
if (IsTransformTerm(json)) {
196+
ICEBERG_ASSIGN_OR_RAISE(auto transform_str,
197+
GetJsonValue<std::string>(json, kTransform));
198+
ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str));
199+
ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm]));
200+
return UnboundTransform::Make(std::move(ref), std::move(transform));
201+
}
202+
return JsonParseError("Invalid unbound transform json: {}", SafeDumpJson(json));
203+
}
204+
205+
nlohmann::json ToJson(const Literal& literal) {
206+
if (literal.IsNull()) {
207+
return nullptr;
208+
}
209+
210+
const auto type_id = literal.type()->type_id();
211+
const auto& value = literal.value();
212+
213+
switch (type_id) {
214+
case TypeId::kBoolean:
215+
return std::get<bool>(value);
216+
case TypeId::kInt:
217+
return std::get<int32_t>(value);
218+
case TypeId::kDate:
219+
return TransformUtil::HumanDay(std::get<int32_t>(value));
220+
case TypeId::kLong:
221+
return std::get<int64_t>(value);
222+
case TypeId::kTime:
223+
return TransformUtil::HumanTime(std::get<int64_t>(value));
224+
case TypeId::kTimestamp:
225+
return TransformUtil::HumanTimestamp(std::get<int64_t>(value));
226+
case TypeId::kTimestampTz:
227+
return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value));
228+
case TypeId::kFloat:
229+
return std::get<float>(value);
230+
case TypeId::kDouble:
231+
return std::get<double>(value);
232+
case TypeId::kString:
233+
return std::get<std::string>(value);
234+
case TypeId::kBinary:
235+
case TypeId::kFixed: {
236+
const auto& bytes = std::get<std::vector<uint8_t>>(value);
237+
std::string hex;
238+
hex.reserve(bytes.size() * 2);
239+
for (uint8_t byte : bytes) {
240+
hex += std::format("{:02X}", byte);
241+
}
242+
return hex;
243+
}
244+
case TypeId::kDecimal: {
245+
return literal.ToString();
246+
}
247+
case TypeId::kUuid:
248+
return std::get<Uuid>(value).ToString();
249+
default:
250+
nlohmann::json json;
251+
return json;
252+
}
253+
}
254+
255+
Result<Literal> LiteralFromJson(const nlohmann::json& json) {
256+
if (json.is_null()) {
257+
return Literal::Null(nullptr);
258+
}
259+
if (json.is_boolean()) {
260+
return Literal::Boolean(json.get<bool>());
261+
}
262+
if (json.is_number_integer()) {
263+
return Literal::Long(json.get<int64_t>());
264+
}
265+
if (json.is_number_float()) {
266+
return Literal::Double(json.get<double>());
267+
}
268+
if (json.is_string()) {
269+
// All strings are returned as String literals.
270+
// Conversion to binary/date/time/etc. happens during binding
271+
// when schema type information is available.
272+
return Literal::String(json.get<std::string>());
273+
}
274+
return JsonParseError("Unsupported literal JSON type");
275+
}
276+
277+
nlohmann::json TermToJson(const Term& term) {
278+
switch (term.kind()) {
279+
case Term::Kind::kReference:
280+
return ToJson(static_cast<const NamedReference&>(term));
281+
case Term::Kind::kTransform:
282+
return ToJson(static_cast<const UnboundTransform&>(term));
283+
default:
284+
return nullptr;
285+
}
286+
}
287+
288+
nlohmann::json ToJson(const UnboundPredicate& pred) {
289+
nlohmann::json json;
290+
json[kType] = ToJson(pred.op());
291+
292+
// Get term and literals by casting to the appropriate impl type
293+
std::span<const Literal> literals;
294+
295+
if (auto* ref_pred = dynamic_cast<const UnboundPredicateImpl<BoundReference>*>(&pred)) {
296+
json[kTerm] = TermToJson(*ref_pred->term());
297+
literals = ref_pred->literals();
298+
} else if (auto* transform_pred =
299+
dynamic_cast<const UnboundPredicateImpl<BoundTransform>*>(&pred)) {
300+
json[kTerm] = TermToJson(*transform_pred->term());
301+
literals = transform_pred->literals();
302+
}
303+
304+
if (!IsUnaryOperation(pred.op())) {
305+
if (IsSetOperation(pred.op())) {
306+
nlohmann::json values = nlohmann::json::array();
307+
for (const auto& lit : literals) {
308+
values.push_back(ToJson(lit));
309+
}
310+
json[kValues] = std::move(values);
311+
} else if (!literals.empty()) {
312+
json[kValue] = ToJson(literals[0]);
313+
}
314+
}
315+
return json;
316+
}
317+
318+
Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
319+
const nlohmann::json& json) {
320+
ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
321+
322+
const auto& term_json = json[kTerm];
323+
324+
if (IsTransformTerm(term_json)) {
325+
ICEBERG_ASSIGN_OR_RAISE(auto term, UnboundTransformFromJson(term_json));
326+
return MakePredicateFromJson<BoundTransform>(op, std::move(term), json);
327+
}
328+
329+
ICEBERG_ASSIGN_OR_RAISE(auto term, NamedReferenceFromJson(term_json));
330+
return MakePredicateFromJson<BoundReference>(op, std::move(term), json);
331+
}
332+
126333
Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& json) {
127-
// Handle boolean
334+
// Handle boolean constants
128335
if (json.is_boolean()) {
129336
return json.get<bool>()
130337
? internal::checked_pointer_cast<Expression>(True::Instance())
131338
: internal::checked_pointer_cast<Expression>(False::Instance());
132339
}
133-
return JsonParseError("Only booleans are currently supported.");
340+
341+
if (!json.is_object()) [[unlikely]] {
342+
return JsonParseError("Expression must be boolean or object");
343+
}
344+
345+
ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));
346+
347+
switch (op) {
348+
case Expression::Operation::kAnd: {
349+
if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] {
350+
return JsonParseError("AND expression missing 'left' or 'right' field");
351+
}
352+
ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft]));
353+
ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight]));
354+
ICEBERG_ASSIGN_OR_RAISE(auto result, And::Make(std::move(left), std::move(right)));
355+
return std::shared_ptr<Expression>(std::move(result));
356+
}
357+
case Expression::Operation::kOr: {
358+
if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] {
359+
return JsonParseError("OR expression missing 'left' or 'right' field");
360+
}
361+
ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft]));
362+
ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight]));
363+
ICEBERG_ASSIGN_OR_RAISE(auto result, Or::Make(std::move(left), std::move(right)));
364+
return std::shared_ptr<Expression>(std::move(result));
365+
}
366+
case Expression::Operation::kNot: {
367+
if (!json.contains(kChild)) [[unlikely]] {
368+
return JsonParseError("NOT expression missing 'child' field");
369+
}
370+
ICEBERG_ASSIGN_OR_RAISE(auto child, ExpressionFromJson(json[kChild]));
371+
ICEBERG_ASSIGN_OR_RAISE(auto result, Not::Make(std::move(child)));
372+
return std::shared_ptr<Expression>(std::move(result));
373+
}
374+
default:
375+
// All other operations are predicates
376+
return UnboundPredicateFromJson(json);
377+
}
134378
}
135379

136380
nlohmann::json ToJson(const Expression& expr) {
137381
switch (expr.op()) {
138382
case Expression::Operation::kTrue:
139383
return true;
140-
141384
case Expression::Operation::kFalse:
142385
return false;
386+
case Expression::Operation::kAnd: {
387+
const auto& and_expr = static_cast<const And&>(expr);
388+
nlohmann::json json;
389+
json[kType] = ToJson(expr.op());
390+
json[kLeft] = ToJson(*and_expr.left());
391+
json[kRight] = ToJson(*and_expr.right());
392+
return json;
393+
}
394+
case Expression::Operation::kOr: {
395+
const auto& or_expr = static_cast<const Or&>(expr);
396+
nlohmann::json json;
397+
json[kType] = ToJson(expr.op());
398+
json[kLeft] = ToJson(*or_expr.left());
399+
json[kRight] = ToJson(*or_expr.right());
400+
return json;
401+
}
402+
case Expression::Operation::kNot: {
403+
const auto& not_expr = static_cast<const Not&>(expr);
404+
nlohmann::json json;
405+
json[kType] = ToJson(expr.op());
406+
json[kChild] = ToJson(*not_expr.child());
407+
return json;
408+
}
143409
default:
144-
// TODO(evindj): This code will be removed as we implemented the full expression
145-
// serialization.
146-
ICEBERG_CHECK_OR_DIE(false, "Only booleans are currently supported.");
410+
return ToJson(dynamic_cast<const UnboundPredicate&>(expr));
147411
}
148412
}
149413

0 commit comments

Comments
 (0)