It would be nice if the dispatched sql would be the optimised one in order to avoid a full table scan on partitioned tables.
Which gives an estimated processed bytes of 156.35 MB when pasted on the big query editor.
SELECT
`bfuid_col_3` AS `bfuid_col_3`,
`bfuid_col_12` AS `bfuid_col_12`,
`bfuid_col_5` AS `bfuid_col_5`
FROM
(SELECT
`t1`.`bfuid_col_3`,
`t1`.`bfuid_col_12`,
`t1`.`bfuid_col_5`,
`t1`.`bfuid_col_28` AS `bfuid_col_29`
FROM (
SELECT
ROW_NUMBER() OVER (
ORDER BY CONCAT(
CAST(FARM_FINGERPRINT(
CONCAT(
CONCAT('\\', REPLACE(COALESCE(`t0`.`hash`, ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`size` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`virtual_size` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`version` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`lock_time` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(`t0`.`block_hash`, ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`block_number` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`block_timestamp` AS STRING), ''), '\\', '\\\\')),
CONCAT(
'\\',
REPLACE(COALESCE(CAST(`t0`.`block_timestamp_month` AS STRING), ''), '\\', '\\\\')
),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`input_count` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`output_count` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`input_value` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`output_value` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`is_coinbase` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`fee` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(to_json_string(`t0`.`inputs`), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(to_json_string(`t0`.`outputs`), ''), '\\', '\\\\'))
)
) AS STRING),
CAST(FARM_FINGERPRINT(
CONCAT(
CONCAT(
CONCAT('\\', REPLACE(COALESCE(`t0`.`hash`, ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`size` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`virtual_size` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`version` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`lock_time` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(`t0`.`block_hash`, ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`block_number` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`block_timestamp` AS STRING), ''), '\\', '\\\\')),
CONCAT(
'\\',
REPLACE(COALESCE(CAST(`t0`.`block_timestamp_month` AS STRING), ''), '\\', '\\\\')
),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`input_count` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`output_count` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`input_value` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`output_value` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`is_coinbase` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`fee` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(to_json_string(`t0`.`inputs`), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(to_json_string(`t0`.`outputs`), ''), '\\', '\\\\'))
),
'_'
)
) AS STRING),
CAST(RAND() AS STRING)
) ASC
) - 1 AS `bfuid_col_3`,
`t0`.`size` AS `bfuid_col_5`,
`t0`.`block_timestamp_month` AS `bfuid_col_12`,
`t0`.`block_timestamp_month` = DATE(2025, 10, 1) AS `bfuid_col_21`,
ROW_NUMBER() OVER (
ORDER BY CONCAT(
CAST(FARM_FINGERPRINT(
CONCAT(
CONCAT('\\', REPLACE(COALESCE(`t0`.`hash`, ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`size` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`virtual_size` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`version` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`lock_time` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(`t0`.`block_hash`, ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`block_number` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`block_timestamp` AS STRING), ''), '\\', '\\\\')),
CONCAT(
'\\',
REPLACE(COALESCE(CAST(`t0`.`block_timestamp_month` AS STRING), ''), '\\', '\\\\')
),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`input_count` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`output_count` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`input_value` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`output_value` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`is_coinbase` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`fee` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(to_json_string(`t0`.`inputs`), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(to_json_string(`t0`.`outputs`), ''), '\\', '\\\\'))
)
) AS STRING),
CAST(FARM_FINGERPRINT(
CONCAT(
CONCAT(
CONCAT('\\', REPLACE(COALESCE(`t0`.`hash`, ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`size` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`virtual_size` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`version` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`lock_time` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(`t0`.`block_hash`, ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`block_number` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`block_timestamp` AS STRING), ''), '\\', '\\\\')),
CONCAT(
'\\',
REPLACE(COALESCE(CAST(`t0`.`block_timestamp_month` AS STRING), ''), '\\', '\\\\')
),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`input_count` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`output_count` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`input_value` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`output_value` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`is_coinbase` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(CAST(`t0`.`fee` AS STRING), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(to_json_string(`t0`.`inputs`), ''), '\\', '\\\\')),
CONCAT('\\', REPLACE(COALESCE(to_json_string(`t0`.`outputs`), ''), '\\', '\\\\'))
),
'_'
)
) AS STRING),
CAST(RAND() AS STRING)
) ASC
) - 1 AS `bfuid_col_28`
FROM (
SELECT
`hash`,
`size`,
`virtual_size`,
`version`,
`lock_time`,
`block_hash`,
`block_number`,
`block_timestamp`,
`block_timestamp_month`,
`input_count`,
`output_count`,
`input_value`,
`output_value`,
`is_coinbase`,
`fee`,
`inputs`,
`outputs`
FROM `bigquery-public-data.crypto_bitcoin.transactions` FOR SYSTEM_TIME AS OF TIMESTAMP('2025-10-23T12:47:48.073960+00:00')
) AS `t0`
) AS `t1`
WHERE
`t1`.`bfuid_col_21`)
ORDER BY `bfuid_col_29` ASC NULLS LAST
I have noticed a wild discrepancy between the sql obtained from
DataFrame.sqland the dispatched sql fromDataFrame.to_pandas(dry_run=True). The sql code I get fromDataFrame.sqlis much cleaner and, accordingly to the big query UI, it would consume much less bytes when executed (in my example 156.35 MB). The dispatched sql obtained viaDataFrame.to_pandas(dry_run=True)is much heavier and less optimised, requiring a full table scan (~2TB in my example). As far as I understood, the sql fromDataFrame.sqldoes not rely on any cached table.It would be nice if the dispatched sql would be the optimised one in order to avoid a full table scan on partitioned tables.
Environment details
pip 25.0.1bigframesversion:2.26.0Code example
Outputs
Which gives an estimated processed bytes of 156.35 MB when pasted on the big query editor.
Checking the dispatched sql I get: