-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Reapply "ast.NodeVisitor for import tracking"
#7241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
5486051
Reapply "`ast.NodeVisitor` for import tracking (#7229)" (#7230)
ShaharNaveh fb9f477
Fix tests
ShaharNaveh e7d5c9c
Merge remote-tracking branch 'upstream/main' into deps-nodevisitor
ShaharNaveh b0d938d
Fix visit_Call
ShaharNaveh 50ca5c1
Merge remote-tracking branch 'upstream/main' into deps-nodevisitor
ShaharNaveh File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,7 +11,6 @@ | |
| import difflib | ||
| import functools | ||
| import pathlib | ||
| import re | ||
| import shelve | ||
| import subprocess | ||
|
|
||
|
|
@@ -32,62 +31,112 @@ | |
| # === Import parsing utilities === | ||
|
|
||
|
|
||
| def _extract_top_level_code(content: str) -> str: | ||
| """Extract only top-level code from Python content for faster parsing.""" | ||
| def_idx = content.find("\ndef ") | ||
| class_idx = content.find("\nclass ") | ||
| class ImportVisitor(ast.NodeVisitor): | ||
| def __init__(self) -> None: | ||
| self.__imports = set() | ||
|
|
||
| indices = [i for i in (def_idx, class_idx) if i != -1] | ||
| if indices: | ||
| content = content[: min(indices)] | ||
| return content.rstrip("\n") | ||
| @property | ||
| def test_imports(self) -> frozenset[str]: | ||
| imports = set() | ||
| for module in self.__imports: | ||
| if not module.startswith("test."): | ||
| continue | ||
| name = module.removeprefix("test.") | ||
|
|
||
| if name == "support" or name.startswith("support."): | ||
| continue | ||
|
|
||
| _FROM_TEST_IMPORT_RE = re.compile(r"^from test import (.+)", re.MULTILINE) | ||
| _FROM_TEST_DOT_RE = re.compile(r"^from test\.(\w+)", re.MULTILINE) | ||
| _IMPORT_TEST_DOT_RE = re.compile(r"^import test\.(\w+)", re.MULTILINE) | ||
| imports.add(name) | ||
|
|
||
| return frozenset(imports) | ||
|
|
||
| def parse_test_imports(content: str) -> set[str]: | ||
| """Parse test file content and extract test package dependencies.""" | ||
| content = _extract_top_level_code(content) | ||
| imports = set() | ||
| @property | ||
| def lib_imports(self) -> frozenset[str]: | ||
| return frozenset( | ||
| # module.split(".", 1)[0] | ||
| module | ||
| for module in self.__imports | ||
| if not module.startswith("test.") | ||
| ) | ||
|
|
||
| for match in _FROM_TEST_IMPORT_RE.finditer(content): | ||
| import_list = match.group(1) | ||
| for part in import_list.split(","): | ||
| name = part.split()[0].strip() | ||
| if name and name not in ("support", "__init__"): | ||
| imports.add(name) | ||
| def visit_Import(self, node): | ||
| for alias in node.names: | ||
| self.__imports.add(alias.name) | ||
|
|
||
| for match in _FROM_TEST_DOT_RE.finditer(content): | ||
| dep = match.group(1) | ||
| if dep not in ("support", "__init__"): | ||
| imports.add(dep) | ||
| def visit_ImportFrom(self, node): | ||
| try: | ||
| module = node.module | ||
| except AttributeError: | ||
| # Ignore `from . import my_internal_module` | ||
| return | ||
|
|
||
| for match in _IMPORT_TEST_DOT_RE.finditer(content): | ||
| dep = match.group(1) | ||
| if dep not in ("support", "__init__"): | ||
| imports.add(dep) | ||
| if module is None: # Ignore `from . import my_internal_module` | ||
| return | ||
|
|
||
| return imports | ||
| for alias in node.names: | ||
| # We only care about what we import if it was from the "test" module | ||
| if module == "test": | ||
| name = f"{module}.{alias.name}" | ||
| else: | ||
| name = module | ||
|
|
||
| self.__imports.add(name) | ||
|
|
||
| _IMPORT_RE = re.compile(r"^import\s+(\w[\w.]*)", re.MULTILINE) | ||
| _FROM_IMPORT_RE = re.compile(r"^from\s+(\w[\w.]*)\s+import", re.MULTILINE) | ||
| def visit_Call(self, node) -> None: | ||
| """ | ||
| In test files, there's sometimes use of: | ||
|
|
||
| ```python | ||
| import test.support | ||
| from test.support import script_helper | ||
|
|
||
| def parse_lib_imports(content: str) -> set[str]: | ||
| """Parse library file and extract all imported module names.""" | ||
| imports = set() | ||
| script = support.findfile("_test_atexit.py") | ||
| script_helper.run_test_script(script) | ||
| ``` | ||
|
|
||
| This imports "_test_atexit.py" but does not show as an import node. | ||
| """ | ||
| func = node.func | ||
| if not isinstance(func, ast.Attribute): | ||
| return | ||
|
|
||
| value = func.value | ||
| if not isinstance(value, ast.Name): | ||
| return | ||
|
|
||
| if (value.id != "support") or (func.attr != "findfile"): | ||
| return | ||
|
|
||
| arg = node.args[0] | ||
| if not isinstance(arg, ast.Constant): | ||
| return | ||
|
|
||
| for match in _IMPORT_RE.finditer(content): | ||
| imports.add(match.group(1)) | ||
| target = arg.value | ||
| if not target.endswith(".py"): | ||
| return | ||
|
|
||
| for match in _FROM_IMPORT_RE.finditer(content): | ||
| imports.add(match.group(1)) | ||
| target = target.removesuffix(".py") | ||
| self.__imports.add(f"test.{target}") | ||
|
|
||
| return imports | ||
|
|
||
| def parse_test_imports(content: str) -> set[str]: | ||
| """Parse test file content and extract test package dependencies.""" | ||
| if not (tree := safe_parse_ast(content)): | ||
| return set() | ||
|
|
||
| visitor = ImportVisitor() | ||
| visitor.visit(tree) | ||
| return visitor.test_imports | ||
|
|
||
|
|
||
| def parse_lib_imports(content: str) -> set[str]: | ||
| """Parse library file and extract all imported module names.""" | ||
| if not (tree := safe_parse_ast(content)): | ||
| return set() | ||
|
|
||
| visitor = ImportVisitor() | ||
| visitor.visit(tree) | ||
| return visitor.lib_imports | ||
|
Comment on lines
+122
to
+139
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Return type annotations don't match actual return types. Both functions declare 🔧 Fix type annotations-def parse_test_imports(content: str) -> set[str]:
+def parse_test_imports(content: str) -> frozenset[str]:
"""Parse test file content and extract test package dependencies."""
if not (tree := safe_parse_ast(content)):
- return set()
+ return frozenset()
visitor = ImportVisitor()
visitor.visit(tree)
return visitor.test_imports
-def parse_lib_imports(content: str) -> set[str]:
+def parse_lib_imports(content: str) -> frozenset[str]:
"""Parse library file and extract all imported module names."""
if not (tree := safe_parse_ast(content)):
- return set()
+ return frozenset()
visitor = ImportVisitor()
visitor.visit(tree)
return visitor.lib_imports🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| # === TODO marker utilities === | ||
|
|
@@ -104,7 +153,7 @@ def filter_rustpython_todo(content: str) -> str: | |
|
|
||
| def count_rustpython_todo(content: str) -> int: | ||
| """Count lines containing RustPython TODO markers.""" | ||
| return sum(1 for line in content.splitlines() if TODO_MARKER in line) | ||
| return content.count(TODO_MARKER) | ||
|
|
||
|
|
||
| def count_todo_in_path(path: pathlib.Path) -> int: | ||
|
|
@@ -113,10 +162,7 @@ def count_todo_in_path(path: pathlib.Path) -> int: | |
| content = safe_read_text(path) | ||
| return count_rustpython_todo(content) if content else 0 | ||
|
|
||
| total = 0 | ||
| for _, content in read_python_files(path): | ||
| total += count_rustpython_todo(content) | ||
| return total | ||
| return sum(count_rustpython_todo(content) for _, content in read_python_files(path)) | ||
|
|
||
|
|
||
| # === Test utilities === | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing bounds check for
node.argscould raiseIndexError.If a malformed call like
support.findfile()(no arguments) exists, accessingnode.args[0]will raise anIndexError, potentially crashing the dependency scan for that file.🛡️ Add defensive check
if (value.id != "support") or (func.attr != "findfile"): return + if not node.args: + return + arg = node.args[0] if not isinstance(arg, ast.Constant): return🤖 Prompt for AI Agents