googleapis
diff --git a/‎bigframes/core/array_value.py‎
Lines changed: 11 additions & 2 deletions b/‎bigframes/core/array_value.py‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎bigframes/core/blocks.py‎
Lines changed: 34 additions & 40 deletions b/‎bigframes/core/blocks.py‎
Lines changed: 34 additions & 40 deletions
diff --git a/‎bigframes/core/compile/ibis_compiler/default_ordering.py‎
Lines changed: 2 additions & 10 deletions b/‎bigframes/core/compile/ibis_compiler/default_ordering.py‎
Lines changed: 2 additions & 10 deletions
diff --git a/‎bigframes/core/compile/ibis_compiler/ibis_compiler.py‎
Lines changed: 1 addition & 0 deletions b/‎bigframes/core/compile/ibis_compiler/ibis_compiler.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bigframes/core/compile/ibis_compiler/scalar_op_compiler.py‎
Lines changed: 29 additions & 0 deletions b/‎bigframes/core/compile/ibis_compiler/scalar_op_compiler.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎
Lines changed: 13 additions & 3 deletions b/‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 1 addition & 0 deletions b/‎bigframes/core/compile/sqlglot/compiler.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/generic_ops.py‎
Lines changed: 12 additions & 1 deletion b/‎bigframes/core/compile/sqlglot/expressions/generic_ops.py‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎bigframes/core/compile/sqlglot/scalar_compiler.py‎
Lines changed: 29 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/scalar_compiler.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎bigframes/core/nodes.py‎
Lines changed: 3 additions & 1 deletion b/‎bigframes/core/nodes.py‎
Lines changed: 3 additions & 1 deletion
@@ -540,13 +540,22 @@ def explode(self, column_ids: typing.Sequence[str]) -> ArrayValue:
         offsets = tuple(ex.deref(id) for id in column_ids)
         return ArrayValue(nodes.ExplodeNode(child=self.node, column_ids=offsets))
 
-    def _uniform_sampling(self, fraction: float) -> ArrayValue:
+    def _uniform_sampling(
+        self, fraction: float, shuffle: bool, seed: Optional[int] = None
+    ) -> ArrayValue:
         """Sampling the table on given fraction.
 
         .. warning::
             The row numbers of result is non-deterministic, avoid to use.
         """
-        return ArrayValue(nodes.RandomSampleNode(self.node, fraction))
+        return ArrayValue(
+            nodes.RandomSampleNode(self.node, fraction, shuffle=shuffle, seed=seed)
+        )
+
+    def _shuffle(self, seed: Optional[int] = None):
+        return ArrayValue(
+            nodes.RandomSampleNode(self.node, fraction=1.0, shuffle=True, seed=seed)
+        )
 
     # Deterministically generate namespaced ids for new variables
     # These new ids are only unique within the current namespace.
 
@@ -833,35 +833,46 @@ def _materialize_local(
             return df, execute_result.query_job
 
     def _downsample(
-        self, total_rows: int, sampling_method: str, fraction: float, random_state
+        self,
+        total_rows: int,
+        sampling_method: str,
+        fraction: float,
+        random_state: Optional[int],
     ) -> Block:
         # either selecting fraction or number of rows
         if sampling_method == _HEAD:
             filtered_block = self.slice(stop=int(total_rows * fraction))
             return filtered_block
         elif (sampling_method == _UNIFORM) and (random_state is None):
-            filtered_expr = self.expr._uniform_sampling(fraction)
-            block = Block(
-                filtered_expr,
-                index_columns=self.index_columns,
-                column_labels=self.column_labels,
-                index_labels=self.index.names,
-            )
-            return block
+            return self.sample(fraction=fraction, shuffle=False, seed=random_state)
         elif sampling_method == _UNIFORM:
-            block = self.split(
-                fracs=(fraction,),
-                random_state=random_state,
-                sort=False,
-            )[0]
-            return block
+            return self.sample(fraction=fraction, shuffle=False)
         else:
             # This part should never be called, just in case.
             raise NotImplementedError(
                 f"The downsampling method {sampling_method} is not implemented, "
                 f"please choose from {','.join(_SAMPLING_METHODS)}."
             )
 
+    def sample(
+        self, fraction: float, shuffle: bool, seed: Optional[int] = None
+    ) -> Block:
+        assert fraction <= 1.0 and fraction >= 0
+        return Block(
+            self.expr._uniform_sampling(fraction=fraction, shuffle=shuffle, seed=seed),
+            index_columns=self.index_columns,
+            column_labels=self.column_labels,
+            index_labels=self.index.names,
+        )
+
+    def shuffle(self, seed: Optional[int] = None) -> Block:
+        return Block(
+            self.expr._uniform_sampling(fraction=1.0, shuffle=True, seed=seed),
+            index_columns=self.index_columns,
+            column_labels=self.column_labels,
+            index_labels=self.index.names,
+        )
+
     def split(
         self,
         ns: Iterable[int] = (),
@@ -894,22 +905,11 @@ def split(
             random_state = random.randint(-(2**63), 2**63 - 1)
 
         # Create a new column with random_state value.
-        block, random_state_col = block.create_constant(str(random_state))
+        og_ordering_col = None
+        if sort is False:
+            block, og_ordering_col = block.promote_offsets()
 
-        # Create an ordering col and convert to string
-        block, ordering_col = block.promote_offsets()
-        block, string_ordering_col = block.apply_unary_op(
-            ordering_col, ops.AsTypeOp(to_type=bigframes.dtypes.STRING_DTYPE)
-        )
-
-        # Apply hash method to sum col and order by it.
-        block, string_sum_col = block.apply_binary_op(
-            string_ordering_col, random_state_col, ops.strconcat_op
-        )
-        block, hash_string_sum_col = block.apply_unary_op(string_sum_col, ops.hash_op)
-        block = block.order_by(
-            [ordering.OrderingExpression(ex.deref(hash_string_sum_col))]
-        )
+        block = block.shuffle(seed=random_state)
 
         intervals = []
         cur = 0
@@ -934,21 +934,15 @@ def split(
                 for sliced_block in sliced_blocks
             ]
         elif sort is False:
+            assert og_ordering_col is not None
             sliced_blocks = [
                 sliced_block.order_by(
-                    [ordering.OrderingExpression(ex.deref(ordering_col))]
-                )
+                    [ordering.OrderingExpression(ex.deref(og_ordering_col))]
+                ).drop_columns([og_ordering_col])
                 for sliced_block in sliced_blocks
             ]
 
-        drop_cols = [
-            random_state_col,
-            ordering_col,
-            string_ordering_col,
-            string_sum_col,
-            hash_string_sum_col,
-        ]
-        return [sliced_block.drop_columns(drop_cols) for sliced_block in sliced_blocks]
+        return [sliced_block for sliced_block in sliced_blocks]
 
     def _compute_dry_run(
         self,
 
@@ -57,12 +57,11 @@ def _convert_to_nonnull_string(column: ibis_types.Value) -> ibis_types.StringVal
     )
 
 
-def gen_row_key(
+def gen_row_hash(
     columns: Sequence[ibis_types.Value],
 ) -> bigframes_vendored.ibis.Value:
     ordering_hash_part = guid.generate_guid("bigframes_ordering_")
     ordering_hash_part2 = guid.generate_guid("bigframes_ordering_")
-    ordering_rand_part = guid.generate_guid("bigframes_ordering_")
 
     # All inputs into hash must be non-null or resulting hash will be null
     str_values = list(map(_convert_to_nonnull_string, columns))
@@ -81,11 +80,4 @@ def gen_row_key(
         .name(ordering_hash_part2)
         .cast(ibis_dtypes.String(nullable=True))
     )
-    # Used to disambiguate between identical rows (which will have identical hash)
-    random_value = (
-        bigframes_vendored.ibis.random()
-        .name(ordering_rand_part)
-        .cast(ibis_dtypes.String(nullable=True))
-    )
-
-    return full_row_hash.concat(full_row_hash_p2, random_value)
+    return full_row_hash.concat(full_row_hash_p2)
@@ -77,6 +77,7 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult:
 
 def _replace_unsupported_ops(node: nodes.BigFrameNode):
     # TODO: Run all replacement rules as single bottom-up pass
+    node = nodes.bottom_up(node, rewrites.rewrite_random_sample)
     node = nodes.bottom_up(node, rewrites.rewrite_slice)
     node = nodes.bottom_up(node, rewrites.rewrite_timedelta_expressions)
     node = nodes.bottom_up(node, rewrites.rewrite_range_rolling)
 
@@ -100,6 +100,35 @@ def compile_row_op(
         impl = self._registry[op.name]
         return impl(inputs, op)
 
+    def register_nullary_op(
+        self,
+        op_ref: typing.Union[ops.NullaryOp, type[ops.NullaryOp]],
+        pass_op: bool = False,
+    ):
+        """
+        Decorator to register a unary op implementation.
+
+        Args:
+            op_ref (UnaryOp or UnaryOp type):
+                Class or instance of operator that is implemented by the decorated function.
+            pass_op (bool):
+                Set to true if implementation takes the operator object as the last argument.
+                This is needed for parameterized ops where parameters are part of op object.
+        """
+        key = typing.cast(str, op_ref.name)
+
+        def decorator(impl: typing.Callable[..., ibis_types.Value]):
+            def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
+                if pass_op:
+                    return impl(op)
+                else:
+                    return impl()
+
+            self._register(key, normalized_impl)
+            return impl
+
+        return decorator
+
     def register_unary_op(
         self,
         op_ref: typing.Union[ops.UnaryOp, type[ops.UnaryOp]],
 
@@ -1987,9 +1987,19 @@ def _construct_prompt(
     return ibis.struct(prompt)
 
 
-@scalar_op_compiler.register_nary_op(ops.RowKey, pass_op=True)
-def rowkey_op_impl(*values: ibis_types.Value, op: ops.RowKey) -> ibis_types.Value:
-    return bigframes.core.compile.ibis_compiler.default_ordering.gen_row_key(values)
+@scalar_op_compiler.register_nary_op(ops.RowHash, pass_op=True)
+def rowkey_op_impl(*values: ibis_types.Value, op: ops.RowHash) -> ibis_types.Value:
+    return bigframes.core.compile.ibis_compiler.default_ordering.gen_row_hash(values)
+
+
+@scalar_op_compiler.register_nullary_op(ops.rand_op, pass_op=False)
+def rand_op_impl() -> ibis_types.Value:
+    return ibis.random()
+
+
+@scalar_op_compiler.register_nullary_op(ops.gen_uuid_op, pass_op=False)
+def gen_uuid_op_impl() -> ibis_types.Value:
+    return ibis.uuid()
 
 
 # Helpers
 
@@ -386,6 +386,7 @@ def compile_window(node: nodes.WindowOpNode, child: ir.SQLGlotIR) -> ir.SQLGlotI
 
 
 def _replace_unsupported_ops(node: nodes.BigFrameNode):
+    node = nodes.bottom_up(node, rewrite.rewrite_random_sample)
     node = nodes.bottom_up(node, rewrite.rewrite_slice)
     node = nodes.bottom_up(node, rewrite.rewrite_range_rolling)
     return node
@@ -23,6 +23,7 @@
 from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
 import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
 
+register_nullary_op = scalar_compiler.scalar_op_compiler.register_nullary_op
 register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
 register_binary_op = scalar_compiler.scalar_op_compiler.register_binary_op
 register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op
@@ -173,7 +174,7 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
     return sge.Coalesce(this=left.expr, expressions=[right.expr])
 
 
-@register_nary_op(ops.RowKey)
+@register_nary_op(ops.RowHash)
 def _(*values: TypedExpr) -> sge.Expression:
     # All inputs into hash must be non-null or resulting hash will be null
     str_values = [_convert_to_nonnull_string_sqlglot(value) for value in values]
@@ -197,6 +198,16 @@ def _(*values: TypedExpr) -> sge.Expression:
     )
 
 
+@register_nullary_op(ops.rand_op)
+def _() -> sge.Expression:
+    return sge.func("RAND")
+
+
+@register_nullary_op(ops.gen_uuid_op)
+def _() -> sge.Expression:
+    return sge.func("GENERATE_UUID")
+
+
 # Helper functions
 def _cast_to_json(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
     from_type = expr.dtype
 
@@ -93,6 +93,35 @@ def compile_row_op(
         impl = self._registry[op.name]
         return impl(inputs, op)
 
+    def register_nullary_op(
+        self,
+        op_ref: typing.Union[ops.NullaryOp, type[ops.NullaryOp]],
+        pass_op: bool = False,
+    ):
+        """
+        Decorator to register a unary op implementation.
+
+        Args:
+            op_ref (UnaryOp or UnaryOp type):
+                Class or instance of operator that is implemented by the decorated function.
+            pass_op (bool):
+                Set to true if implementation takes the operator object as the last argument.
+                This is needed for parameterized ops where parameters are part of op object.
+        """
+        key = typing.cast(str, op_ref.name)
+
+        def decorator(impl: typing.Callable[..., sge.Expression]):
+            def normalized_impl(args: typing.Sequence[TypedExpr], op: ops.RowOp):
+                if pass_op:
+                    return impl(op)
+                else:
+                    return impl()
+
+            self._register(key, normalized_impl)
+            return impl
+
+        return decorator
+
     def register_unary_op(
         self,
         op_ref: typing.Union[ops.UnaryOp, type[ops.UnaryOp]],
 
@@ -1531,10 +1531,12 @@ def remap_refs(
 @dataclasses.dataclass(frozen=True, eq=False)
 class RandomSampleNode(UnaryNode):
     fraction: float
+    shuffle: bool
+    seed: Optional[int] = None
 
     @property
     def deterministic(self) -> bool:
-        return False
+        return self.seed is not None
 
     @property
     def row_preserving(self) -> bool: