Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.
Merged
4 changes: 4 additions & 0 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,10 @@ def explode(self, offsets: typing.Sequence[int]) -> UnorderedIR:
columns=columns,
)

def as_ordered_ir(self) -> OrderedIR:
"""Convert to OrderedIr, but without any definite ordering."""
return OrderedIR(self._table, self._columns, predicates=self._predicates)

## Helpers
def _set_or_replace_by_id(
self, id: str, new_value: ibis_types.Value
Expand Down
29 changes: 21 additions & 8 deletions bigframes/core/compile/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,27 @@ def _compile_node(
@_compile_node.register
def compile_join(self, node: nodes.JoinNode, ordered: bool = True):
if ordered:
left_ordered = self.compile_ordered_ir(node.left_child)
right_ordered = self.compile_ordered_ir(node.right_child)
return bigframes.core.compile.single_column.join_by_column_ordered(
left=left_ordered,
right=right_ordered,
type=node.type,
conditions=node.conditions,
)
# In general, joins are an ordering destroying operation.
# With ordering_mode = "partial", make this explicit. In
# this case, we don't need to provide a deterministic ordering.
if self.strict:
left_ordered = self.compile_ordered_ir(node.left_child)
right_ordered = self.compile_ordered_ir(node.right_child)
return bigframes.core.compile.single_column.join_by_column_ordered(
left=left_ordered,
right=right_ordered,
type=node.type,
conditions=node.conditions,
)
else:
left_unordered = self.compile_unordered_ir(node.left_child)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious if wee need to do anything to handle sort=True behavior from the pandas join? I'm guessing "no", as I suspect this would be implemented as a sort after the join.

Also, let's add a comment explaining this optimization:

Suggested change
left_unordered = self.compile_unordered_ir(node.left_child)
# In general, joins are an ordering destroying operation.
# With ordering_mode = "partial", make this explicit. In
# this case, we don't need to provide a deterministic ordering.
left_unordered = self.compile_unordered_ir(node.left_child)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added comment

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And yes, sort=True sorts as an additional operation and so will still apply: https://github.com/googleapis/python-bigquery-dataframes/blob/main/bigframes/core/blocks.py#L2073-L2080

right_unordered = self.compile_unordered_ir(node.right_child)
return bigframes.core.compile.single_column.join_by_column_unordered(
left=left_unordered,
right=right_unordered,
type=node.type,
conditions=node.conditions,
).as_ordered_ir()
else:
left_unordered = self.compile_unordered_ir(node.left_child)
right_unordered = self.compile_unordered_ir(node.right_child)
Expand Down
4 changes: 3 additions & 1 deletion bigframes/core/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def order_ambiguous(self) -> bool:

@property
def explicitly_ordered(self) -> bool:
# Do not consider user pre-join ordering intent - they need to re-order post-join in unordered mode.
return False

def __hash__(self):
Expand Down Expand Up @@ -307,7 +308,8 @@ def order_ambiguous(self) -> bool:

@property
def explicitly_ordered(self) -> bool:
return all(child.explicitly_ordered for child in self.children)
# Consider concat as an ordered operations (even though input frames may not be ordered)
return True

def __hash__(self):
return self._node_hash
Expand Down