From 832d6fbca3584767cec914e550e19fd021d53df1 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 9 Jan 2026 13:49:17 +0000 Subject: [PATCH 1/6] Prepare `LocalSourceNode` for locality Removes the dependence on the (global) `ModuleVariableNode.getARead()`, by adding a local version (that doesn't include `import *` reads) instead. --- .../python/dataflow/new/internal/DataFlowPublic.qll | 10 +++++++--- .../python/dataflow/new/internal/LocalSources.qll | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index ffecbcba57ac..1597553c7563 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -440,13 +440,17 @@ class ModuleVariableNode extends Node, TModuleVariableNode { /** Gets a node that reads this variable. */ Node getARead() { - result.asCfgNode() = var.getALoad().getAFlowNode() and - // Ignore reads that happen when the module is imported. These are only executed once. - not result.getScope() = mod + result = this.getALocalRead() or this = import_star_read(result) } + /** Gets a node that reads this variable, excluding reads that happen through `from ... import *`. */ + Node getALocalRead() { + result.asCfgNode() = var.getALoad().getAFlowNode() and + not result.getScope() = mod + } + /** Gets an `EssaNode` that corresponds to an assignment of this global variable. */ Node getAWrite() { any(EssaNodeDefinition def).definedBy(var, result.asCfgNode().(DefinitionNode)) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index c43a111c9c8b..7752846ae1ff 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -67,7 +67,7 @@ class LocalSourceNode extends Node { or // We explicitly include any read of a global variable, as some of these may have local flow going // into them. - this = any(ModuleVariableNode mvn).getARead() + this = any(ModuleVariableNode v).getALocalRead() or // We include all scope entry definitions, as these act as the local source within the scope they // enter. @@ -248,7 +248,7 @@ private module Cached { pragma[nomagic] private predicate localSourceFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo, _) and - not nodeTo = any(ModuleVariableNode v).getARead() + not nodeTo = any(ModuleVariableNode v).getALocalRead() } /** From fc43d222a2cb0557c4006033e79a9eb735738c33 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 9 Jan 2026 17:01:11 +0000 Subject: [PATCH 2/6] Remove global restriction on `ModuleVariableNode` This may result in more nodes, but it should still be bounded by the number of global variables in the source code. --- .../python/dataflow/new/internal/DataFlowPublic.qll | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 1597553c7563..76782e1f6340 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -76,15 +76,7 @@ newtype TNode = node.getNode() = any(Comp c).getIterable() } or /** A node representing a global (module-level) variable in a specific module. */ - TModuleVariableNode(Module m, GlobalVariable v) { - v.getScope() = m and - ( - v.escapes() - or - isAccessedThroughImportStar(m) and - ImportStar::globalNameDefinedInModule(v.getId(), m) - ) - } or + TModuleVariableNode(Module m, GlobalVariable v) { v.getScope() = m } or /** * A synthetic node representing that an iterable sequence flows to consumer. */ From cd842447e8aa649146006801ef9d5623ff710c41 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 9 Jan 2026 17:12:39 +0000 Subject: [PATCH 3/6] Get rid of unused predicate --- .../lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index 76782e1f6340..b2832c8bdb44 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -462,8 +462,6 @@ class ModuleVariableNode extends Node, TModuleVariableNode { override Location getLocation() { result = mod.getLocation() } } -private predicate isAccessedThroughImportStar(Module m) { m = ImportStar::getStarImported(_) } - private ModuleVariableNode import_star_read(Node n) { resolved_import_star_module(result.getModule(), result.getVariable().getId(), n) } From b0e94e8544fda249c5f7690c755f27b8ca920374 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 12 Jan 2026 15:04:14 +0000 Subject: [PATCH 4/6] Fix tests With `ModuleVariableNode`s now appearing for _all_ global variables (not just the ones that actually seem to be used), some of the tests changed a bit. Mostly this was in the form of new flow (because of new nodes that popped into existence). For some inline expectation tests, I opted to instead exclude these results, as there was no suitable location to annotate. For the normal tests, I just accepted the output (after having vetted it carefully, of course). --- .../utils/test/dataflow/MaximalFlowTest.qll | 4 ++- .../experimental/attrs/AttrWrites.expected | 1 + .../dataflow/basic/global.expected | 15 +++++++++++ .../dataflow/basic/globalStep.expected | 4 +++ .../dataflow/basic/local.expected | 5 ++++ .../dataflow/basic/maximalFlows.expected | 5 ++++ .../dataflow/basic/sinks.expected | 5 ++++ .../dataflow/basic/sources.expected | 5 ++++ .../dataflow/global-flow/test.py | 26 +++++++++---------- .../dataflow/typetracking/moduleattr.expected | 1 + .../PoorMansFunctionResolutionTest.ql | 4 ++- 11 files changed, 60 insertions(+), 15 deletions(-) diff --git a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll index 7587584a269b..5e9831906a4a 100644 --- a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll +++ b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll @@ -8,7 +8,9 @@ module MaximalFlowTest implements FlowTestSig { predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) { source != sink and - MaximalFlows::flow(source, sink) + MaximalFlows::flow(source, sink) and + // exclude ModuleVariableNodes (which have location 0:0:0:0) + not sink instanceof DataFlow::ModuleVariableNode } } diff --git a/python/ql/test/experimental/attrs/AttrWrites.expected b/python/ql/test/experimental/attrs/AttrWrites.expected index f8a55824043f..d1fc30b34515 100644 --- a/python/ql/test/experimental/attrs/AttrWrites.expected +++ b/python/ql/test/experimental/attrs/AttrWrites.expected @@ -1,4 +1,5 @@ | test.py:5:9:5:16 | ControlFlowNode for __init__ | test.py:4:1:4:20 | ControlFlowNode for ClassExpr | __init__ | test.py:5:5:5:28 | ControlFlowNode for FunctionExpr | | test.py:6:9:6:16 | ControlFlowNode for Attribute | test.py:6:9:6:12 | ControlFlowNode for self | foo | test.py:6:20:6:22 | ControlFlowNode for foo | +| test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:0:0:0:0 | ModuleVariableNode in Module test for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for StringLiteral | | test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:9:1:9:5 | ControlFlowNode for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for StringLiteral | | test.py:12:1:12:25 | ControlFlowNode for setattr() | test.py:12:9:12:13 | ControlFlowNode for myobj | foo | test.py:12:23:12:24 | ControlFlowNode for IntegerLiteral | diff --git a/python/ql/test/library-tests/dataflow/basic/global.expected b/python/ql/test/library-tests/dataflow/basic/global.expected index 7d2c0cab9b92..9e0ef2e6751b 100644 --- a/python/ql/test/library-tests/dataflow/basic/global.expected +++ b/python/ql/test/library-tests/dataflow/basic/global.expected @@ -1,6 +1,9 @@ +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z | @@ -8,26 +11,33 @@ | test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:2:3:2:3 | ControlFlowNode for y | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:3:7:3:7 | ControlFlowNode for y | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:2:3:2:3 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:2:7:2:7 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:2:7:2:7 | ControlFlowNode for x | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:3:3:3:3 | ControlFlowNode for z | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:3:3:3:3 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:3:7:3:7 | ControlFlowNode for y | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:3:7:3:7 | ControlFlowNode for y | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:4:10:4:10 | ControlFlowNode for z | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x | @@ -37,6 +47,8 @@ | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:2:7:2:7 | ControlFlowNode for x | @@ -47,7 +59,10 @@ | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:7:1:7:1 | ControlFlowNode for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:7:19:7:19 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:2:7:2:7 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/dataflow/basic/globalStep.expected b/python/ql/test/library-tests/dataflow/basic/globalStep.expected index 00ee53dba003..26d8902e7bbe 100644 --- a/python/ql/test/library-tests/dataflow/basic/globalStep.expected +++ b/python/ql/test/library-tests/dataflow/basic/globalStep.expected @@ -1,5 +1,6 @@ | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | +| test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:3:2:3 | ControlFlowNode for y | @@ -31,10 +32,13 @@ | test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | ControlFlowNode for z | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | | test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | +| test.py:6:1:6:1 | ControlFlowNode for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | | test.py:6:1:6:1 | ControlFlowNode for a | test.py:7:19:7:19 | ControlFlowNode for a | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | ControlFlowNode for a | +| test.py:7:1:7:1 | ControlFlowNode for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | | test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/dataflow/basic/local.expected b/python/ql/test/library-tests/dataflow/basic/local.expected index 142c84015aeb..eb47f1308d42 100644 --- a/python/ql/test/library-tests/dataflow/basic/local.expected +++ b/python/ql/test/library-tests/dataflow/basic/local.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected b/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected index a9fa5d8da920..421918620455 100644 --- a/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected +++ b/python/ql/test/library-tests/dataflow/basic/maximalFlows.expected @@ -1,7 +1,12 @@ +| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id | +| test.py:1:19:1:19 | ControlFlowNode for x | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:1:19:1:19 | ControlFlowNode for x | test.py:7:1:7:1 | ControlFlowNode for b | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:4:10:4:10 | ControlFlowNode for z | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:1:7:1 | ControlFlowNode for b | | test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a | +| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:0:0:0:0 | ModuleVariableNode in Module test for b | | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | ControlFlowNode for b | diff --git a/python/ql/test/library-tests/dataflow/basic/sinks.expected b/python/ql/test/library-tests/dataflow/basic/sinks.expected index bf5800e0b73f..34525d5043e1 100644 --- a/python/ql/test/library-tests/dataflow/basic/sinks.expected +++ b/python/ql/test/library-tests/dataflow/basic/sinks.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | SynthDictSplatParameterNode | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/basic/sources.expected b/python/ql/test/library-tests/dataflow/basic/sources.expected index bf5800e0b73f..34525d5043e1 100644 --- a/python/ql/test/library-tests/dataflow/basic/sources.expected +++ b/python/ql/test/library-tests/dataflow/basic/sources.expected @@ -1,3 +1,8 @@ +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __name__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for __package__ | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for a | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for b | +| test.py:0:0:0:0 | ModuleVariableNode in Module test for obfuscated_id | | test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | | test.py:1:1:1:21 | SynthDictSplatParameterNode | | test.py:1:5:1:17 | ControlFlowNode for obfuscated_id | diff --git a/python/ql/test/library-tests/dataflow/global-flow/test.py b/python/ql/test/library-tests/dataflow/global-flow/test.py index ab2da1c2959d..0b62e47c71fb 100644 --- a/python/ql/test/library-tests/dataflow/global-flow/test.py +++ b/python/ql/test/library-tests/dataflow/global-flow/test.py @@ -8,9 +8,9 @@ g1, g2 = [6], [7] # $writes=g1 writes=g2 -# Assignment that's only referenced in this scope. This one will not give rise to a `ModuleVariableNode`. +# Assignment that's only referenced in this scope. -unreferenced_g = [8] +unreferenced_g = [8] # $writes=unreferenced_g print(unreferenced_g) # Testing modifications of globals @@ -34,7 +34,7 @@ # A global with multiple potential definitions -import unknown_module +import unknown_module # $writes=unknown_module if unknown_module.attr: g_mult = [200] # $writes=g_mult else: @@ -46,7 +46,7 @@ if unknown_module.attr: g_redef = [500] # $writes=g_redef -def global_access(): +def global_access(): # $writes=global_access l = 5 print(g) # $reads=g print(g1) # $reads=g1 @@ -59,12 +59,12 @@ def global_access(): def print_g_mod(): # $writes=print_g_mod print(g_mod) # $reads=g_mod -def global_mod(): +def global_mod(): # $writes=global_mod global g_mod g_mod += [150] # $reads,writes=g_mod print_g_mod() # $reads=print_g_mod -def global_inside_local_function(): +def global_inside_local_function(): # $writes=global_inside_local_function def local_function(): print(g) # $reads=g local_function() @@ -76,21 +76,21 @@ def local_function(): import foo_module # $writes=foo_module -def use_foo(): +def use_foo(): # $writes=use_foo print(foo_module.attr) # $reads=foo_module # Partial imports from bar import baz_attr, quux_attr # $writes=baz_attr writes=quux_attr -def use_partial_import(): +def use_partial_import(): # $writes=use_partial_import print(baz_attr, quux_attr) # $reads=baz_attr reads=quux_attr # Aliased imports from spam_module import ham_attr as eggs_attr # $writes=eggs_attr -def use_aliased_import(): +def use_aliased_import(): # $writes=use_aliased_import print(eggs_attr) # $reads=eggs_attr # Import star (unlikely to work unless we happen to extract/model the referenced module) @@ -99,23 +99,23 @@ def use_aliased_import(): from unknown import * -def secretly_use_unknown(): +def secretly_use_unknown(): # $writes=secretly_use_unknown print(unknown_attr) # $reads=unknown_attr # Known modules from known import * -def secretly_use_known(): +def secretly_use_known(): # $writes=secretly_use_known print(known_attr) # $reads=known_attr # Local import in function -def imports_locally(): +def imports_locally(): # $writes=imports_locally import mod1 # Global import hidden in function -def imports_stuff(): +def imports_stuff(): # $writes=imports_stuff global mod2 import mod2 # $writes=mod2 diff --git a/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected b/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected index ff9673aaaea8..06b560623929 100644 --- a/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected +++ b/python/ql/test/library-tests/dataflow/typetracking/moduleattr.expected @@ -2,6 +2,7 @@ module_tracker | import_as_attr.py:1:6:1:11 | ControlFlowNode for ImportExpr | module_attr_tracker | import_as_attr.py:0:0:0:0 | ModuleVariableNode in Module import_as_attr for attr_ref | +| import_as_attr.py:0:0:0:0 | ModuleVariableNode in Module import_as_attr for x | | import_as_attr.py:1:20:1:35 | ControlFlowNode for ImportMember | | import_as_attr.py:1:28:1:35 | ControlFlowNode for attr_ref | | import_as_attr.py:3:1:3:1 | ControlFlowNode for x | diff --git a/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql b/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql index b0325b027c3a..b9575e43493e 100644 --- a/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql +++ b/python/ql/test/library-tests/frameworks/internal-ql-helpers/PoorMansFunctionResolutionTest.ql @@ -17,7 +17,9 @@ module InlinePoorMansFunctionResolutionTest implements TestSig { ) and // exclude decorator calls (which with our extractor rewrites does reference the // function) - not ref.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall() + not ref.asExpr() = func.getDefinition().(FunctionExpr).getADecoratorCall() and + // exclude ModuleVariableNodes (which have location 0:0:0:0) + not ref instanceof DataFlow::ModuleVariableNode | value = func.getName() and tag = "resolved" and From e0437b68187b8293e4d43ed31ca9e454eff440bf Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 26 Jan 2026 13:28:35 +0000 Subject: [PATCH 5/6] Python: Make `ExtractedArgumentNode` local Explicitly adds a bunch of nodes that were previously (using a global analysis) identified as `ExtractedArgumentNode`s. --- .../dataflow/new/internal/DataFlowPublic.qll | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll index b2832c8bdb44..216ade645764 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll @@ -339,27 +339,50 @@ abstract class ArgumentNode extends Node { final ExtractedDataFlowCall getCall() { this.argumentOf(result, _) } } +/** Gets an overapproximation of the argument nodes that are included in `getCallArg` */ +Node getCallArgApproximation() { + // pre-update nodes for calls + result = any(CallCfgNode c).(PostUpdateNode).getPreUpdateNode() + or + // self parameters in methods + exists(Class c | result.asExpr() = c.getAMethod().getArg(0)) + or + // the object part of an attribute expression (which might be a bound method) + result.asCfgNode() = any(AttrNode a).getObject() + or + // the function part of any call + result.asCfgNode() = any(CallNode c).getFunction() +} + +private Node otherArgs() { + // for potential summaries we allow all normal call arguments + normalCallArg(_, result, _) + or + // and self arguments + result.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject() + or + // for comprehensions, we allow the synthetic `iterable` argument + result.asExpr() = any(Comp c).getIterable() +} + /** * A data flow node that represents a call argument found in the source code. */ class ExtractedArgumentNode extends ArgumentNode { ExtractedArgumentNode() { - // for resolved calls, we need to allow all argument nodes - getCallArg(_, _, _, this, _) - or - // for potential summaries we allow all normal call arguments - normalCallArg(_, this, _) + this = getCallArgApproximation() or - // and self arguments - this.asCfgNode() = any(CallNode c).getFunction().(AttrNode).getObject() - or - // for comprehensions, we allow the synthetic `iterable` argument - this.asExpr() = any(Comp c).getIterable() + this = otherArgs() } final override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { this = call.getArgument(pos) and - call instanceof ExtractedDataFlowCall + call instanceof ExtractedDataFlowCall and + ( + this = otherArgs() + or + this = getCallArgApproximation() and getCallArg(_, _, _, this, _) + ) } } From 897bbbcd4a35647d2a8b80968b5b3be9c5b020c6 Mon Sep 17 00:00:00 2001 From: Taus Date: Mon, 26 Jan 2026 15:38:25 +0000 Subject: [PATCH 6/6] Python: Fix test issues --- python/ql/consistency-queries/DataFlowConsistency.ql | 10 ++++++++++ python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll | 2 +- python/ql/lib/utils/test/dataflow/callGraphConfig.qll | 2 +- .../InitCallsSubclass/InitCallsSubclassMethod.ql | 3 ++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/python/ql/consistency-queries/DataFlowConsistency.ql b/python/ql/consistency-queries/DataFlowConsistency.ql index 759db3d19a9c..62bbb3062c29 100644 --- a/python/ql/consistency-queries/DataFlowConsistency.ql +++ b/python/ql/consistency-queries/DataFlowConsistency.ql @@ -26,6 +26,8 @@ private module Input implements InputSig { or // TODO: Implement post-updates for **kwargs, see tests added in https://github.com/github/codeql/pull/14936 exists(ArgumentPosition apos | n.argumentOf(_, apos) and apos.isDictSplat()) + or + missingArgumentCallExclude(n) } predicate reverseReadExclude(Node n) { @@ -134,6 +136,14 @@ private module Input implements InputSig { other.getNode().getScope() = f ) } + + predicate missingArgumentCallExclude(ArgumentNode arg) { + // We overapproximate the argument nodes in order to not rely on the global `getCallArg` + // predicate. + // Because of this, we must exclude the cases where we have an approximation but no actual + // argument node. + arg = getCallArgApproximation() and not getCallArg(_, _, _, arg, _) + } } import MakeConsistency diff --git a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll index 5e9831906a4a..cbd3b4c6aa51 100644 --- a/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll +++ b/python/ql/lib/utils/test/dataflow/MaximalFlowTest.qll @@ -35,7 +35,7 @@ module MaximalFlowsConfig implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node node) { exists(node.getLocation().getFile().getRelativePath()) and not any(CallNode c).getArg(_) = node.asCfgNode() and - not node instanceof DataFlow::ArgumentNode and + not isArgumentNode(node, _, _) and not node.asCfgNode().(NameNode).getId().matches("SINK%") and not DataFlow::localFlowStep(node, _) } diff --git a/python/ql/lib/utils/test/dataflow/callGraphConfig.qll b/python/ql/lib/utils/test/dataflow/callGraphConfig.qll index 8528396a12f2..85ecb9b701db 100644 --- a/python/ql/lib/utils/test/dataflow/callGraphConfig.qll +++ b/python/ql/lib/utils/test/dataflow/callGraphConfig.qll @@ -9,7 +9,7 @@ module CallGraphConfig implements DataFlow::ConfigSig { predicate isSource(DataFlow::Node node) { node instanceof DataFlowPrivate::ReturnNode or - node instanceof DataFlow::ArgumentNode + DataFlowPrivate::isArgumentNode(node, _, _) } predicate isSink(DataFlow::Node node) { diff --git a/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql b/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql index 32eb5ffe79e5..4c1b3247d96f 100644 --- a/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql +++ b/python/ql/src/Classes/InitCallsSubclass/InitCallsSubclassMethod.ql @@ -15,6 +15,7 @@ import python import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.internal.DataFlowDispatch +import semmle.python.dataflow.new.internal.DataFlowPrivate predicate initSelfCallOverridden( Function init, DataFlow::Node self, DataFlow::MethodCallNode call, Function target, @@ -39,7 +40,7 @@ predicate readsFromSelf(Function method) { self.getParameter() = method.getArg(0) and DataFlow::localFlow(self, sink) | - sink instanceof DataFlow::ArgumentNode + isArgumentNode(sink, _, _) or sink = any(DataFlow::AttrRead a).getObject() )