1717 * under the License.
1818 */
1919
20- #include < format>
2120#include < ranges>
2221#include < string>
23- #include < utility>
2422#include < vector>
2523
2624#include < nlohmann/json.hpp>
2725
2826#include " iceberg/expression/json_serde_internal.h"
2927#include " iceberg/expression/literal.h"
28+ #include " iceberg/expression/predicate.h"
29+ #include " iceberg/expression/term.h"
30+ #include " iceberg/transform.h"
3031#include " iceberg/util/checked_cast.h"
3132#include " iceberg/util/json_util_internal.h"
3233#include " iceberg/util/macros.h"
34+ #include " iceberg/util/transform_util.h"
3335
3436namespace iceberg {
3537namespace {
38+ // JSON field names
39+ constexpr std::string_view kType = " type" ;
40+ constexpr std::string_view kTerm = " term" ;
41+ constexpr std::string_view kTransform = " transform" ;
42+ constexpr std::string_view kValue = " value" ;
43+ constexpr std::string_view kValues = " values" ;
44+ constexpr std::string_view kLeft = " left" ;
45+ constexpr std::string_view kRight = " right" ;
46+ constexpr std::string_view kChild = " child" ;
3647// Expression type strings
3748constexpr std::string_view kTypeTrue = " true" ;
3849constexpr std::string_view kTypeFalse = " false" ;
@@ -58,6 +69,43 @@ constexpr std::string_view kTypeCountNull = "count-null";
5869constexpr std::string_view kTypeCountStar = " count-star" ;
5970constexpr std::string_view kTypeMin = " min" ;
6071constexpr std::string_view kTypeMax = " max" ;
72+
73+ // / Helper to check if a JSON term represents a transform
74+ bool IsTransformTerm (const nlohmann::json& json) {
75+ return json.is_object () && json.contains (kType ) &&
76+ json[kType ].get <std::string>() == kTransform && json.contains (kTerm );
77+ }
78+
79+ // / Template helper to create predicates from JSON with the appropriate term type
80+ template <typename B>
81+ Result<std::unique_ptr<UnboundPredicate>> MakePredicateFromJson (
82+ Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
83+ const nlohmann::json& json) {
84+ if (IsUnaryOperation (op)) {
85+ return UnboundPredicateImpl<B>::Make (op, std::move (term));
86+ }
87+
88+ if (IsSetOperation (op)) {
89+ std::vector<Literal> literals;
90+ if (!json.contains (kValues ) || !json[kValues ].is_array ()) [[unlikely]] {
91+ return JsonParseError (" Missing or invalid 'values' field for set operation: {}" ,
92+ SafeDumpJson (json));
93+ }
94+ for (const auto & val : json[kValues ]) {
95+ ICEBERG_ASSIGN_OR_RAISE (auto lit, LiteralFromJson (val));
96+ literals.push_back (std::move (lit));
97+ }
98+ return UnboundPredicateImpl<B>::Make (op, std::move (term), std::move (literals));
99+ }
100+
101+ // Literal predicate
102+ if (!json.contains (kValue )) [[unlikely]] {
103+ return JsonParseError (" Missing 'value' field for literal predicate: {}" ,
104+ SafeDumpJson (json));
105+ }
106+ ICEBERG_ASSIGN_OR_RAISE (auto literal, LiteralFromJson (json[kValue ]));
107+ return UnboundPredicateImpl<B>::Make (op, std::move (term), std::move (literal));
108+ }
61109} // namespace
62110
63111bool IsUnaryOperation (Expression::Operation op) {
@@ -83,7 +131,7 @@ bool IsSetOperation(Expression::Operation op) {
83131}
84132
85133Result<Expression::Operation> OperationTypeFromJson (const nlohmann::json& json) {
86- if (!json.is_string ()) {
134+ if (!json.is_string ()) [[unlikely]] {
87135 return JsonParseError (" Unable to create operation. Json value is not a string" );
88136 }
89137 auto typeStr = json.get <std::string>();
@@ -123,27 +171,243 @@ nlohmann::json ToJson(Expression::Operation op) {
123171 return json;
124172}
125173
174+ nlohmann::json ToJson (const NamedReference& ref) { return ref.name (); }
175+
176+ Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson (
177+ const nlohmann::json& json) {
178+ if (!json.is_string ()) [[unlikely]] {
179+ return JsonParseError (" Expected string for named reference" );
180+ }
181+ return NamedReference::Make (json.get <std::string>());
182+ }
183+
184+ nlohmann::json ToJson (const UnboundTransform& transform) {
185+ auto & mutable_transform = const_cast <UnboundTransform&>(transform);
186+ nlohmann::json json;
187+ json[kType ] = kTransform ;
188+ json[kTransform ] = transform.transform ()->ToString ();
189+ json[kTerm ] = mutable_transform.reference ()->name ();
190+ return json;
191+ }
192+
193+ Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson (
194+ const nlohmann::json& json) {
195+ if (IsTransformTerm (json)) {
196+ ICEBERG_ASSIGN_OR_RAISE (auto transform_str,
197+ GetJsonValue<std::string>(json, kTransform ));
198+ ICEBERG_ASSIGN_OR_RAISE (auto transform, TransformFromString (transform_str));
199+ ICEBERG_ASSIGN_OR_RAISE (auto ref, NamedReferenceFromJson (json[kTerm ]));
200+ return UnboundTransform::Make (std::move (ref), std::move (transform));
201+ }
202+ return JsonParseError (" Invalid unbound transform json: {}" , SafeDumpJson (json));
203+ }
204+
205+ nlohmann::json ToJson (const Literal& literal) {
206+ if (literal.IsNull ()) {
207+ return nullptr ;
208+ }
209+
210+ const auto type_id = literal.type ()->type_id ();
211+ const auto & value = literal.value ();
212+
213+ switch (type_id) {
214+ case TypeId::kBoolean :
215+ return std::get<bool >(value);
216+ case TypeId::kInt :
217+ return std::get<int32_t >(value);
218+ case TypeId::kDate :
219+ return TransformUtil::HumanDay (std::get<int32_t >(value));
220+ case TypeId::kLong :
221+ return std::get<int64_t >(value);
222+ case TypeId::kTime :
223+ return TransformUtil::HumanTime (std::get<int64_t >(value));
224+ case TypeId::kTimestamp :
225+ return TransformUtil::HumanTimestamp (std::get<int64_t >(value));
226+ case TypeId::kTimestampTz :
227+ return TransformUtil::HumanTimestampWithZone (std::get<int64_t >(value));
228+ case TypeId::kFloat :
229+ return std::get<float >(value);
230+ case TypeId::kDouble :
231+ return std::get<double >(value);
232+ case TypeId::kString :
233+ return std::get<std::string>(value);
234+ case TypeId::kBinary :
235+ case TypeId::kFixed : {
236+ const auto & bytes = std::get<std::vector<uint8_t >>(value);
237+ std::string hex;
238+ hex.reserve (bytes.size () * 2 );
239+ for (uint8_t byte : bytes) {
240+ hex += std::format (" {:02X}" , byte);
241+ }
242+ return hex;
243+ }
244+ case TypeId::kDecimal : {
245+ return literal.ToString ();
246+ }
247+ case TypeId::kUuid :
248+ return std::get<Uuid>(value).ToString ();
249+ default :
250+ nlohmann::json json;
251+ return json;
252+ }
253+ }
254+
255+ Result<Literal> LiteralFromJson (const nlohmann::json& json) {
256+ if (json.is_null ()) {
257+ return Literal::Null (nullptr );
258+ }
259+ if (json.is_boolean ()) {
260+ return Literal::Boolean (json.get <bool >());
261+ }
262+ if (json.is_number_integer ()) {
263+ return Literal::Long (json.get <int64_t >());
264+ }
265+ if (json.is_number_float ()) {
266+ return Literal::Double (json.get <double >());
267+ }
268+ if (json.is_string ()) {
269+ // All strings are returned as String literals.
270+ // Conversion to binary/date/time/etc. happens during binding
271+ // when schema type information is available.
272+ return Literal::String (json.get <std::string>());
273+ }
274+ return JsonParseError (" Unsupported literal JSON type" );
275+ }
276+
277+ nlohmann::json TermToJson (const Term& term) {
278+ switch (term.kind ()) {
279+ case Term::Kind::kReference :
280+ return ToJson (static_cast <const NamedReference&>(term));
281+ case Term::Kind::kTransform :
282+ return ToJson (static_cast <const UnboundTransform&>(term));
283+ default :
284+ return nullptr ;
285+ }
286+ }
287+
288+ nlohmann::json ToJson (const UnboundPredicate& pred) {
289+ nlohmann::json json;
290+ json[kType ] = ToJson (pred.op ());
291+
292+ // Get term and literals by casting to the appropriate impl type
293+ std::span<const Literal> literals;
294+
295+ if (auto * ref_pred = dynamic_cast <const UnboundPredicateImpl<BoundReference>*>(&pred)) {
296+ json[kTerm ] = TermToJson (*ref_pred->term ());
297+ literals = ref_pred->literals ();
298+ } else if (auto * transform_pred =
299+ dynamic_cast <const UnboundPredicateImpl<BoundTransform>*>(&pred)) {
300+ json[kTerm ] = TermToJson (*transform_pred->term ());
301+ literals = transform_pred->literals ();
302+ }
303+
304+ if (!IsUnaryOperation (pred.op ())) {
305+ if (IsSetOperation (pred.op ())) {
306+ nlohmann::json values = nlohmann::json::array ();
307+ for (const auto & lit : literals) {
308+ values.push_back (ToJson (lit));
309+ }
310+ json[kValues ] = std::move (values);
311+ } else if (!literals.empty ()) {
312+ json[kValue ] = ToJson (literals[0 ]);
313+ }
314+ }
315+ return json;
316+ }
317+
318+ Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson (
319+ const nlohmann::json& json) {
320+ ICEBERG_ASSIGN_OR_RAISE (auto op, OperationTypeFromJson (json[kType ]));
321+
322+ const auto & term_json = json[kTerm ];
323+
324+ if (IsTransformTerm (term_json)) {
325+ ICEBERG_ASSIGN_OR_RAISE (auto term, UnboundTransformFromJson (term_json));
326+ return MakePredicateFromJson<BoundTransform>(op, std::move (term), json);
327+ }
328+
329+ ICEBERG_ASSIGN_OR_RAISE (auto term, NamedReferenceFromJson (term_json));
330+ return MakePredicateFromJson<BoundReference>(op, std::move (term), json);
331+ }
332+
126333Result<std::shared_ptr<Expression>> ExpressionFromJson (const nlohmann::json& json) {
127- // Handle boolean
334+ // Handle boolean constants
128335 if (json.is_boolean ()) {
129336 return json.get <bool >()
130337 ? internal::checked_pointer_cast<Expression>(True::Instance ())
131338 : internal::checked_pointer_cast<Expression>(False::Instance ());
132339 }
133- return JsonParseError (" Only booleans are currently supported." );
340+
341+ if (!json.is_object ()) [[unlikely]] {
342+ return JsonParseError (" Expression must be boolean or object" );
343+ }
344+
345+ ICEBERG_ASSIGN_OR_RAISE (auto op, OperationTypeFromJson (json[kType ]));
346+
347+ switch (op) {
348+ case Expression::Operation::kAnd : {
349+ if (!json.contains (kLeft ) || !json.contains (kRight )) [[unlikely]] {
350+ return JsonParseError (" AND expression missing 'left' or 'right' field" );
351+ }
352+ ICEBERG_ASSIGN_OR_RAISE (auto left, ExpressionFromJson (json[kLeft ]));
353+ ICEBERG_ASSIGN_OR_RAISE (auto right, ExpressionFromJson (json[kRight ]));
354+ ICEBERG_ASSIGN_OR_RAISE (auto result, And::Make (std::move (left), std::move (right)));
355+ return std::shared_ptr<Expression>(std::move (result));
356+ }
357+ case Expression::Operation::kOr : {
358+ if (!json.contains (kLeft ) || !json.contains (kRight )) [[unlikely]] {
359+ return JsonParseError (" OR expression missing 'left' or 'right' field" );
360+ }
361+ ICEBERG_ASSIGN_OR_RAISE (auto left, ExpressionFromJson (json[kLeft ]));
362+ ICEBERG_ASSIGN_OR_RAISE (auto right, ExpressionFromJson (json[kRight ]));
363+ ICEBERG_ASSIGN_OR_RAISE (auto result, Or::Make (std::move (left), std::move (right)));
364+ return std::shared_ptr<Expression>(std::move (result));
365+ }
366+ case Expression::Operation::kNot : {
367+ if (!json.contains (kChild )) [[unlikely]] {
368+ return JsonParseError (" NOT expression missing 'child' field" );
369+ }
370+ ICEBERG_ASSIGN_OR_RAISE (auto child, ExpressionFromJson (json[kChild ]));
371+ ICEBERG_ASSIGN_OR_RAISE (auto result, Not::Make (std::move (child)));
372+ return std::shared_ptr<Expression>(std::move (result));
373+ }
374+ default :
375+ // All other operations are predicates
376+ return UnboundPredicateFromJson (json);
377+ }
134378}
135379
136380nlohmann::json ToJson (const Expression& expr) {
137381 switch (expr.op ()) {
138382 case Expression::Operation::kTrue :
139383 return true ;
140-
141384 case Expression::Operation::kFalse :
142385 return false ;
386+ case Expression::Operation::kAnd : {
387+ const auto & and_expr = static_cast <const And&>(expr);
388+ nlohmann::json json;
389+ json[kType ] = ToJson (expr.op ());
390+ json[kLeft ] = ToJson (*and_expr.left ());
391+ json[kRight ] = ToJson (*and_expr.right ());
392+ return json;
393+ }
394+ case Expression::Operation::kOr : {
395+ const auto & or_expr = static_cast <const Or&>(expr);
396+ nlohmann::json json;
397+ json[kType ] = ToJson (expr.op ());
398+ json[kLeft ] = ToJson (*or_expr.left ());
399+ json[kRight ] = ToJson (*or_expr.right ());
400+ return json;
401+ }
402+ case Expression::Operation::kNot : {
403+ const auto & not_expr = static_cast <const Not&>(expr);
404+ nlohmann::json json;
405+ json[kType ] = ToJson (expr.op ());
406+ json[kChild ] = ToJson (*not_expr.child ());
407+ return json;
408+ }
143409 default :
144- // TODO(evindj): This code will be removed as we implemented the full expression
145- // serialization.
146- ICEBERG_CHECK_OR_DIE (false , " Only booleans are currently supported." );
410+ return ToJson (dynamic_cast <const UnboundPredicate&>(expr));
147411 }
148412}
149413
0 commit comments