Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ def _materialize_local(
)
else:
total_rows = execute_result.total_rows
arrow = self.session._executor.execute(self.expr).to_arrow_table()
arrow = execute_result.to_arrow_table()
df = io_pandas.arrow_to_pandas(arrow, schema=self.expr.schema)
self._copy_index_to_pandas(df)

Expand Down
21 changes: 17 additions & 4 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4821,20 +4821,33 @@ def test_to_gbq_table_labels(scalars_df_index):
pytest.param(["A", "C"], True, id="two_arrays_true"),
],
)
def test_dataframe_explode(col_names, ignore_index):
def test_dataframe_explode(col_names, ignore_index, session):
data = {
"A": [[0, 1, 2], [], [3, 4]],
"B": 3,
"C": [["a", "b", "c"], np.nan, ["d", "e"]],
}
df = bpd.DataFrame(data)

metrics = session._metrics
df = bpd.DataFrame(data, session=session)
pd_df = df.to_pandas()
pd_result = pd_df.explode(col_names, ignore_index=ignore_index)
bf_result = df.explode(col_names, ignore_index=ignore_index)

# Check that to_pandas() results in at most a single query execution
execs_pre = metrics.execution_count
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: put this line right below line 4831 or move line 4831 above this line, just to keep together statements that are related

bf_materialized = bf_result.to_pandas()
execs_post = metrics.execution_count

pd.testing.assert_frame_equal(
df.explode(col_names, ignore_index=ignore_index).to_pandas(),
pd_df.explode(col_names, ignore_index=ignore_index),
bf_materialized,
pd_result,
check_index_type=False,
check_dtype=False,
)
# we test this property on this method in particular as compilation
# is non-deterministic and won't use the query cache as implemented
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I would also document that this is to check that the execution was performed only once

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reorganized a bit to make clear that only care about executions from the to_pandas() invocation.

assert execs_post - execs_pre <= 1


@pytest.mark.parametrize(
Expand Down