refactor(bigframes): Add basic disassembly to IR compiler#17320
refactor(bigframes): Add basic disassembly to IR compiler#17320TrevorBergeron wants to merge 2 commits into
Conversation
There was a problem hiding this comment.
Code Review
This pull request introduces bytecode compilation to BigQuery expressions, along with custom expression classes and unit tests. The reviewer feedback highlights several opportunities for improvement, including handling variables explicitly set to None in LOAD_GLOBAL, refactoring repetitive blocks for older Python binary operations using a module-level mapping, guarding against a potential AttributeError when op_symbol is None in BINARY_OP, and fixing typos and copy-paste errors in exception messages.
| elif opname == "LOAD_GLOBAL": | ||
| name = inst.argval | ||
| val = None | ||
| if name in closure_dict: | ||
| val = closure_dict[name] | ||
| elif name in globals_dict: | ||
| val = globals_dict[name] | ||
| elif name in builtins_dict: | ||
| val = builtins_dict[name] | ||
|
|
||
| if isinstance(val, ModuleType): | ||
| stack.append(py_exprs.Module(val)) | ||
| elif val is not None: | ||
| stack.append(py_exprs.PyObject(val)) | ||
| else: | ||
| stack.append(expression.UnboundVariableExpression(name)) |
There was a problem hiding this comment.
If a global or closure variable is explicitly set to None, it is currently resolved as an unbound variable instead of a constant None. We should use a boolean flag or sentinel to check if the variable was found.
elif opname == "LOAD_GLOBAL":
name = inst.argval
found = False
val = None
if name in closure_dict:
val = closure_dict[name]
found = True
elif name in globals_dict:
val = globals_dict[name]
found = True
elif name in builtins_dict:
val = builtins_dict[name]
found = True
if found:
if isinstance(val, ModuleType):
stack.append(py_exprs.Module(val))
else:
stack.append(py_exprs.PyObject(val))
else:
stack.append(expression.UnboundVariableExpression(name))| _COMPARE_OP_MAP = { | ||
| "==": operator.eq, | ||
| "!=": operator.ne, | ||
| "<": operator.lt, | ||
| "<=": operator.le, | ||
| ">": operator.gt, | ||
| ">=": operator.ge, | ||
| } |
There was a problem hiding this comment.
Define a mapping for older Python binary/inplace opcodes to their corresponding operator functions at the module level to avoid repetitive code blocks and PEP 8 violations.
_COMPARE_OP_MAP = {
"==": operator.eq,
"!=": operator.ne,
"<": operator.lt,
"<=": operator.le,
">–": operator.gt,
">=": operator.ge,
}
_OLD_BINARY_OP_MAP = {
"BINARY_ADD": operator.add,
"INPLACE_ADD": operator.add,
"BINARY_SUBTRACT": operator.sub,
"INPLACE_SUBTRACT": operator.sub,
"BINARY_MULTIPLY": operator.mul,
"INPLACE_MULTIPLY": operator.mul,
"BINARY_TRUE_DIVIDE": operator.truediv,
"INPLACE_TRUE_DIVIDE": operator.truediv,
"BINARY_FLOOR_DIVIDE": operator.floordiv,
"INPLACE_FLOOR_DIVIDE": operator.floordiv,
"BINARY_MODULO": operator.mod,
"INPLACE_MODULO": operator.mod,
"BINARY_POWER": operator.pow,
"INPLACE_POWER": operator.pow,
}| op_symbol = inst.argrepr | ||
| if not op_symbol and isinstance(inst.argval, str): | ||
| op_symbol = inst.argval | ||
| if op_symbol.endswith("="): | ||
| op_symbol = op_symbol[:-1] |
There was a problem hiding this comment.
If op_symbol is None, calling endswith will raise an AttributeError. We should guard it by checking if op_symbol is truthy.
| op_symbol = inst.argrepr | |
| if not op_symbol and isinstance(inst.argval, str): | |
| op_symbol = inst.argval | |
| if op_symbol.endswith("="): | |
| op_symbol = op_symbol[:-1] | |
| op_symbol = inst.argrepr | |
| if not op_symbol and isinstance(inst.argval, str): | |
| op_symbol = inst.argval | |
| if op_symbol and op_symbol.endswith("="): | |
| op_symbol = op_symbol[:-1] |
| elif opname in ("BINARY_ADD", "INPLACE_ADD"): | ||
| if len(stack) < 2: return None | ||
| right = stack.pop(); left = stack.pop() | ||
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.add), (left, right))) | ||
| elif opname in ("BINARY_SUBTRACT", "INPLACE_SUBTRACT"): | ||
| if len(stack) < 2: return None | ||
| right = stack.pop(); left = stack.pop() | ||
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.sub), (left, right))) | ||
| elif opname in ("BINARY_MULTIPLY", "INPLACE_MULTIPLY"): | ||
| if len(stack) < 2: return None | ||
| right = stack.pop(); left = stack.pop() | ||
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.mul), (left, right))) | ||
| elif opname in ("BINARY_TRUE_DIVIDE", "INPLACE_TRUE_DIVIDE"): | ||
| if len(stack) < 2: return None | ||
| right = stack.pop(); left = stack.pop() | ||
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.truediv), (left, right))) | ||
| elif opname in ("BINARY_FLOOR_DIVIDE", "INPLACE_FLOOR_DIVIDE"): | ||
| if len(stack) < 2: return None | ||
| right = stack.pop(); left = stack.pop() | ||
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.floordiv), (left, right))) | ||
| elif opname in ("BINARY_MODULO", "INPLACE_MODULO"): | ||
| if len(stack) < 2: return None | ||
| right = stack.pop(); left = stack.pop() | ||
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.mod), (left, right))) | ||
| elif opname in ("BINARY_POWER", "INPLACE_POWER"): | ||
| if len(stack) < 2: return None | ||
| right = stack.pop(); left = stack.pop() | ||
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.pow), (left, right))) |
There was a problem hiding this comment.
Simplify the repetitive blocks for older Python versions by using the _OLD_BINARY_OP_MAP lookup. This also avoids multiple statements on a single line, adhering to PEP 8.
| elif opname in ("BINARY_ADD", "INPLACE_ADD"): | |
| if len(stack) < 2: return None | |
| right = stack.pop(); left = stack.pop() | |
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.add), (left, right))) | |
| elif opname in ("BINARY_SUBTRACT", "INPLACE_SUBTRACT"): | |
| if len(stack) < 2: return None | |
| right = stack.pop(); left = stack.pop() | |
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.sub), (left, right))) | |
| elif opname in ("BINARY_MULTIPLY", "INPLACE_MULTIPLY"): | |
| if len(stack) < 2: return None | |
| right = stack.pop(); left = stack.pop() | |
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.mul), (left, right))) | |
| elif opname in ("BINARY_TRUE_DIVIDE", "INPLACE_TRUE_DIVIDE"): | |
| if len(stack) < 2: return None | |
| right = stack.pop(); left = stack.pop() | |
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.truediv), (left, right))) | |
| elif opname in ("BINARY_FLOOR_DIVIDE", "INPLACE_FLOOR_DIVIDE"): | |
| if len(stack) < 2: return None | |
| right = stack.pop(); left = stack.pop() | |
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.floordiv), (left, right))) | |
| elif opname in ("BINARY_MODULO", "INPLACE_MODULO"): | |
| if len(stack) < 2: return None | |
| right = stack.pop(); left = stack.pop() | |
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.mod), (left, right))) | |
| elif opname in ("BINARY_POWER", "INPLACE_POWER"): | |
| if len(stack) < 2: return None | |
| right = stack.pop(); left = stack.pop() | |
| stack.append(py_exprs.Call(py_exprs.PyObject(operator.pow), (left, right))) | |
| elif opname in _OLD_BINARY_OP_MAP: | |
| if len(stack) < 2: | |
| return None | |
| right = stack.pop() | |
| left = stack.pop() | |
| stack.append(py_exprs.Call(py_exprs.PyObject(_OLD_BINARY_OP_MAP[opname]), (left, right))) |
| @property | ||
| def output_type(self) -> dtypes.ExpressionType: | ||
| raise ValueError("Module expresion has not type") |
There was a problem hiding this comment.
Fix the typo ('expresion') and grammatical error in the exception message.
| @property | |
| def output_type(self) -> dtypes.ExpressionType: | |
| raise ValueError("Module expresion has not type") | |
| @property | |
| def output_type(self) -> dtypes.ExpressionType: | |
| raise ValueError("Module expression does not have a type.") |
| @property | ||
| def output_type(self) -> dtypes.ExpressionType: | ||
| raise ValueError("Module expresion has not type") |
There was a problem hiding this comment.
Fix the copy-paste error and typo in the exception message. It should refer to PyObject instead of Module.
| @property | |
| def output_type(self) -> dtypes.ExpressionType: | |
| raise ValueError("Module expresion has not type") | |
| @property | |
| def output_type(self) -> dtypes.ExpressionType: | |
| raise ValueError("PyObject expression does not have a type.") |
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
Fixes #<issue_number_goes_here> 🦕