diff --git a/python/ql/src/experimental/CWE-091/Xslt.qhelp b/python/ql/src/experimental/CWE-091/Xslt.qhelp new file mode 100644 index 000000000000..d35da4e82d49 --- /dev/null +++ b/python/ql/src/experimental/CWE-091/Xslt.qhelp @@ -0,0 +1,18 @@ + + + +

+ Processing an unvalidated XSL stylesheet can allow an attacker to change the structure and contents of the resultant XML, include arbitrary files from the file system, or execute arbitrary code. +

+
+ +

+ This vulnerability can be prevented by not allowing untrusted user input to be passed as an XSL stylesheet. + If the application logic necessiates processing untrusted XSL stylesheets, the input should be properly filtered and sanitized before use. +

+
+ +

In the example below, the XSL stylesheet is controlled by the user and hence leads to a vulnerability.

+ +
+
diff --git a/python/ql/src/experimental/CWE-091/Xslt.ql b/python/ql/src/experimental/CWE-091/Xslt.ql new file mode 100644 index 000000000000..aa35d92c01a7 --- /dev/null +++ b/python/ql/src/experimental/CWE-091/Xslt.ql @@ -0,0 +1,35 @@ +/** + * @name XSLT query built from user-controlled sources + * @description Building a XSLT query from user-controlled sources is vulnerable to insertion of + * malicious XSLT code by the user. + * @kind path-problem + * @problem.severity error + * @precision high + * @id py/xslt-injection + * @tags security + * external/cwe/cwe-643 + */ + +import python +import semmle.python.security.Paths +/* Sources */ +import semmle.python.web.HttpRequest +/* Sinks */ +import experimental.semmle.python.security.injection.XSLT + +class XSLTInjectionConfiguration extends TaintTracking::Configuration { + XSLTInjectionConfiguration() { this = "XSLT injection configuration" } + + override predicate isSource(TaintTracking::Source source) { + source instanceof HttpRequestTaintSource + } + + override predicate isSink(TaintTracking::Sink sink) { + sink instanceof XSLTInjection::XSLTInjectionSink + } +} + +from XSLTInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink +where config.hasFlowPath(src, sink) +select sink.getSink(), src, sink, "This XSLT query depends on $@.", src.getSource(), + "a user-provided value" diff --git a/python/ql/src/experimental/CWE-091/xslt.py b/python/ql/src/experimental/CWE-091/xslt.py new file mode 100644 index 000000000000..1655916c7e06 --- /dev/null +++ b/python/ql/src/experimental/CWE-091/xslt.py @@ -0,0 +1,14 @@ +from lxml import etree +from io import StringIO +from flask import Flask, request + +app = Flask(__name__) + + +@app.route("/xslt") +def bad(): + xsltQuery = request.args.get('xml', '') + xslt_root = etree.XML(xsltQuery) + f = StringIO('') + tree = etree.parse(f) + result_tree = tree.xslt(xslt_root) # Not OK diff --git a/python/ql/src/experimental/semmle/python/security/injection/XSLT.qll b/python/ql/src/experimental/semmle/python/security/injection/XSLT.qll new file mode 100644 index 000000000000..d4f2814df97d --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/injection/XSLT.qll @@ -0,0 +1,115 @@ +/** + * Provides class and predicates to track external data that + * may represent malicious XSLT query objects. + * + * This module is intended to be imported into a taint-tracking query + * to extend `TaintKind` and `TaintSink`. + */ + +import python +import semmle.python.dataflow.TaintTracking +import semmle.python.web.HttpRequest + +/** Models XSLT Injection related classes and functions */ +module XSLTInjection { + /** Returns a class value which refers to `lxml.etree` */ + Value etree() { result = Value::named("lxml.etree") } + + /** A generic taint sink that is vulnerable to XSLT injection. */ + abstract class XSLTInjectionSink extends TaintSink { } + + /** + * A kind of "taint", representing an untrusted XML string + */ + private class ExternalXmlStringKind extends ExternalStringKind { + ExternalXmlStringKind() { this = "etree.XML string" } + + override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) { + etreeXML(fromnode, tonode) and result instanceof ExternalXmlKind + or + etreeFromStringList(fromnode, tonode) and result instanceof ExternalXmlKind + or + etreeFromString(fromnode, tonode) and result instanceof ExternalXmlKind + } + } + + /** + * A kind of "taint", representing a XML encoded string + */ + class ExternalXmlKind extends TaintKind { + ExternalXmlKind() { this = "lxml etree xml" } + } + + private predicate etreeXML(ControlFlowNode fromnode, CallNode tonode) { + // etree.XML("") + exists(CallNode call | call.getFunction().(AttrNode).getObject("XML").pointsTo(etree()) | + call.getArg(0) = fromnode and + call = tonode + ) + } + + private predicate etreeFromString(ControlFlowNode fromnode, CallNode tonode) { + // etree.fromstring(text, parser=None) + exists(CallNode call | call.getFunction().(AttrNode).getObject("fromstring").pointsTo(etree()) | + call.getArg(0) = fromnode and + call = tonode + ) + } + + private predicate etreeFromStringList(ControlFlowNode fromnode, CallNode tonode) { + // etree.fromstringlist(strings, parser=None) + exists(CallNode call | + call.getFunction().(AttrNode).getObject("fromstringlist").pointsTo(etree()) + | + call.getArg(0) = fromnode and + call = tonode + ) + } + + /** + * A Sink representing an argument to the `etree.XSLT` call. + * + * from lxml import etree + * root = etree.XML("") + * find_text = etree.XSLT("`sink`") + */ + private class EtreeXSLTArgument extends XSLTInjectionSink { + override string toString() { result = "lxml.etree.XSLT" } + + EtreeXSLTArgument() { + exists(CallNode call | call.getFunction().(AttrNode).getObject("XSLT").pointsTo(etree()) | + call.getArg(0) = this + ) + } + + override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind } + } + + /** + * A Sink representing an argument to the `XSLT` call to a parsed xml document. + * + * from lxml import etree + * from io import StringIO + * `sink` = etree.XML(xsltQuery) + * tree = etree.parse(f) + * result_tree = tree.xslt(`sink`) + */ + private class ParseXSLTArgument extends XSLTInjectionSink { + override string toString() { result = "lxml.etree.parse.xslt" } + + ParseXSLTArgument() { + exists( + CallNode parseCall, CallNode xsltCall, ControlFlowNode obj, Variable var, AssignStmt assign + | + parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and + assign.getValue().(Call).getAFlowNode() = parseCall and + xsltCall.getFunction().(AttrNode).getObject("xslt") = obj and + var.getAUse() = obj and + assign.getATarget() = var.getAStore() and + xsltCall.getArg(0) = this + ) + } + + override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind } + } +} diff --git a/python/ql/test/experimental/CWE-091/Xslt.expected b/python/ql/test/experimental/CWE-091/Xslt.expected new file mode 100644 index 000000000000..89f19160f697 --- /dev/null +++ b/python/ql/test/experimental/CWE-091/Xslt.expected @@ -0,0 +1,47 @@ +edges +| xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:10:17:10:43 | etree.XML string | +| xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:10:17:10:43 | etree.XML string | +| xslt.py:10:17:10:43 | etree.XML string | xslt.py:11:27:11:35 | etree.XML string | +| xslt.py:10:17:10:43 | etree.XML string | xslt.py:11:27:11:35 | etree.XML string | +| xslt.py:11:17:11:36 | lxml etree xml | xslt.py:14:29:14:37 | lxml etree xml | +| xslt.py:11:17:11:36 | lxml etree xml | xslt.py:14:29:14:37 | lxml etree xml | +| xslt.py:11:27:11:35 | etree.XML string | xslt.py:11:17:11:36 | lxml etree xml | +| xslt.py:11:27:11:35 | etree.XML string | xslt.py:11:17:11:36 | lxml etree xml | +| xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:10:17:10:43 | etree.XML string | +| xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:10:17:10:43 | etree.XML string | +| xsltInjection.py:10:17:10:43 | etree.XML string | xsltInjection.py:11:27:11:35 | etree.XML string | +| xsltInjection.py:10:17:10:43 | etree.XML string | xsltInjection.py:11:27:11:35 | etree.XML string | +| xsltInjection.py:11:17:11:36 | lxml etree xml | xsltInjection.py:12:28:12:36 | lxml etree xml | +| xsltInjection.py:11:17:11:36 | lxml etree xml | xsltInjection.py:12:28:12:36 | lxml etree xml | +| xsltInjection.py:11:27:11:35 | etree.XML string | xsltInjection.py:11:17:11:36 | lxml etree xml | +| xsltInjection.py:11:27:11:35 | etree.XML string | xsltInjection.py:11:17:11:36 | lxml etree xml | +| xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:17:17:17:43 | etree.XML string | +| xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:17:17:17:43 | etree.XML string | +| xsltInjection.py:17:17:17:43 | etree.XML string | xsltInjection.py:18:27:18:35 | etree.XML string | +| xsltInjection.py:17:17:17:43 | etree.XML string | xsltInjection.py:18:27:18:35 | etree.XML string | +| xsltInjection.py:18:17:18:36 | lxml etree xml | xsltInjection.py:21:29:21:37 | lxml etree xml | +| xsltInjection.py:18:17:18:36 | lxml etree xml | xsltInjection.py:21:29:21:37 | lxml etree xml | +| xsltInjection.py:18:27:18:35 | etree.XML string | xsltInjection.py:18:17:18:36 | lxml etree xml | +| xsltInjection.py:18:27:18:35 | etree.XML string | xsltInjection.py:18:17:18:36 | lxml etree xml | +| xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:26:17:26:43 | etree.XML string | +| xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:26:17:26:43 | etree.XML string | +| xsltInjection.py:26:17:26:43 | etree.XML string | xsltInjection.py:27:27:27:35 | etree.XML string | +| xsltInjection.py:26:17:26:43 | etree.XML string | xsltInjection.py:27:27:27:35 | etree.XML string | +| xsltInjection.py:27:17:27:36 | lxml etree xml | xsltInjection.py:31:24:31:32 | lxml etree xml | +| xsltInjection.py:27:17:27:36 | lxml etree xml | xsltInjection.py:31:24:31:32 | lxml etree xml | +| xsltInjection.py:27:27:27:35 | etree.XML string | xsltInjection.py:27:17:27:36 | lxml etree xml | +| xsltInjection.py:27:27:27:35 | etree.XML string | xsltInjection.py:27:17:27:36 | lxml etree xml | +| xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:35:17:35:43 | etree.XML string | +| xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:35:17:35:43 | etree.XML string | +| xsltInjection.py:35:17:35:43 | etree.XML string | xsltInjection.py:36:34:36:42 | etree.XML string | +| xsltInjection.py:35:17:35:43 | etree.XML string | xsltInjection.py:36:34:36:42 | etree.XML string | +| xsltInjection.py:36:17:36:43 | lxml etree xml | xsltInjection.py:40:24:40:32 | lxml etree xml | +| xsltInjection.py:36:17:36:43 | lxml etree xml | xsltInjection.py:40:24:40:32 | lxml etree xml | +| xsltInjection.py:36:34:36:42 | etree.XML string | xsltInjection.py:36:17:36:43 | lxml etree xml | +| xsltInjection.py:36:34:36:42 | etree.XML string | xsltInjection.py:36:17:36:43 | lxml etree xml | +#select +| xslt.py:14:29:14:37 | xslt_root | xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:14:29:14:37 | lxml etree xml | This XSLT query depends on $@. | xslt.py:10:17:10:28 | Attribute | a user-provided value | +| xsltInjection.py:12:28:12:36 | xslt_root | xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:12:28:12:36 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:10:17:10:28 | Attribute | a user-provided value | +| xsltInjection.py:21:29:21:37 | xslt_root | xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:21:29:21:37 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:17:17:17:28 | Attribute | a user-provided value | +| xsltInjection.py:31:24:31:32 | xslt_root | xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:31:24:31:32 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:26:17:26:28 | Attribute | a user-provided value | +| xsltInjection.py:40:24:40:32 | xslt_root | xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:40:24:40:32 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:35:17:35:28 | Attribute | a user-provided value | diff --git a/python/ql/test/experimental/CWE-091/Xslt.qlref b/python/ql/test/experimental/CWE-091/Xslt.qlref new file mode 100644 index 000000000000..32605307db89 --- /dev/null +++ b/python/ql/test/experimental/CWE-091/Xslt.qlref @@ -0,0 +1 @@ +experimental/CWE-643/Xslt.ql diff --git a/python/ql/test/experimental/CWE-091/XsltSinks.expected b/python/ql/test/experimental/CWE-091/XsltSinks.expected new file mode 100644 index 000000000000..7150b3046e28 --- /dev/null +++ b/python/ql/test/experimental/CWE-091/XsltSinks.expected @@ -0,0 +1,12 @@ +| xslt.py:14:29:14:37 | lxml.etree.parse.xslt | lxml etree xml | +| xsltInjection.py:12:28:12:36 | lxml.etree.XSLT | lxml etree xml | +| xsltInjection.py:21:29:21:37 | lxml.etree.parse.xslt | lxml etree xml | +| xsltInjection.py:31:24:31:32 | lxml.etree.parse.xslt | lxml etree xml | +| xsltInjection.py:40:24:40:32 | lxml.etree.parse.xslt | lxml etree xml | +| xsltInjection.py:50:24:50:32 | lxml.etree.parse.xslt | lxml etree xml | +| xsltInjection.py:60:24:60:32 | lxml.etree.parse.xslt | lxml etree xml | +| xsltInjection.py:69:24:69:32 | lxml.etree.parse.xslt | lxml etree xml | +| xsltInjection.py:79:24:79:32 | lxml.etree.parse.xslt | lxml etree xml | +| xsltSinks.py:17:28:17:36 | lxml.etree.XSLT | lxml etree xml | +| xsltSinks.py:30:29:30:37 | lxml.etree.parse.xslt | lxml etree xml | +| xsltSinks.py:44:24:44:32 | lxml.etree.parse.xslt | lxml etree xml | diff --git a/python/ql/test/experimental/CWE-091/XsltSinks.ql b/python/ql/test/experimental/CWE-091/XsltSinks.ql new file mode 100644 index 000000000000..2149e6921d0b --- /dev/null +++ b/python/ql/test/experimental/CWE-091/XsltSinks.ql @@ -0,0 +1,6 @@ +import python +import experimental.semmle.python.security.injection.XSLT + +from XSLTInjection::XSLTInjectionSink sink, TaintKind kind +where sink.sinks(kind) +select sink, kind diff --git a/python/ql/test/experimental/CWE-091/options b/python/ql/test/experimental/CWE-091/options new file mode 100644 index 000000000000..79d9dcd31b66 --- /dev/null +++ b/python/ql/test/experimental/CWE-091/options @@ -0,0 +1 @@ +semmle-extractor-options: -p ../../query-tests/Security/lib/ --max-import-depth=3 diff --git a/python/ql/test/experimental/CWE-091/xslt.py b/python/ql/test/experimental/CWE-091/xslt.py new file mode 100644 index 000000000000..1655916c7e06 --- /dev/null +++ b/python/ql/test/experimental/CWE-091/xslt.py @@ -0,0 +1,14 @@ +from lxml import etree +from io import StringIO +from flask import Flask, request + +app = Flask(__name__) + + +@app.route("/xslt") +def bad(): + xsltQuery = request.args.get('xml', '') + xslt_root = etree.XML(xsltQuery) + f = StringIO('') + tree = etree.parse(f) + result_tree = tree.xslt(xslt_root) # Not OK diff --git a/python/ql/test/experimental/CWE-091/xsltInjection.py b/python/ql/test/experimental/CWE-091/xsltInjection.py new file mode 100644 index 000000000000..ddab954bbff8 --- /dev/null +++ b/python/ql/test/experimental/CWE-091/xsltInjection.py @@ -0,0 +1,79 @@ +from lxml import etree +from io import StringIO +from flask import Flask, request + +app = Flask(__name__) + + +@app.route("/xslt1") +def a(): + xsltQuery = request.args.get('xml', '') + xslt_root = etree.XML(xsltQuery) + transform = etree.XSLT(xslt_root) # Not OK + + +@app.route("/xslt2") +def b(): + xsltQuery = request.args.get('xml', '') + xslt_root = etree.XML(xsltQuery) + f = StringIO('') + tree = etree.parse(f) + result_tree = tree.xslt(xslt_root) # Not OK + + +@app.route("/xslt3") +def c(): + xsltQuery = request.args.get('xml', '') + xslt_root = etree.XML(xsltQuery) + + f = StringIO('') + tree = etree.parse(f) + result = tree.xslt(xslt_root, a="'A'") # Not OK + +@app.route("/xslt4") +def d(): + xsltQuery = request.args.get('xml', '') + xslt_root = etree.fromstring(xsltQuery) + + f = StringIO('') + tree = etree.parse(f) + result = tree.xslt(xslt_root, a="'A'") # Not OK + +@app.route("/xslt5") +def e(): + xsltQuery = request.args.get('xml', '') + xsltStrings = [xsltQuery,"asd","random"] + xslt_root = etree.fromstringlist(xsltStrings) + + f = StringIO('') + tree = etree.parse(f) + result = tree.xslt(xslt_root, a="'A'") # Not OK + + +@app.route("/xslt6") +def f(): + xsltQuery = '' + xslt_root = etree.XML(xsltQuery) + + f = StringIO('') + tree = etree.parse(f) + result = tree.xslt(xslt_root, a="'A'") # OK + +@app.route("/xslt7") +def g(): + xsltQuery = '' + xslt_root = etree.fromstring(xsltQuery) + + f = StringIO('') + tree = etree.parse(f) + result = tree.xslt(xslt_root, a="'A'") # OK + +@app.route("/xslt8") +def h(): + xsltQuery = '' + xsltStrings = [xsltQuery,"asd","random"] + xslt_root = etree.fromstringlist(xsltStrings) + + f = StringIO('') + tree = etree.parse(f) + result = tree.xslt(xslt_root, a="'A'") # OK \ No newline at end of file diff --git a/python/ql/test/experimental/CWE-091/xsltSinks.py b/python/ql/test/experimental/CWE-091/xsltSinks.py new file mode 100644 index 000000000000..a82fc0c6c5fc --- /dev/null +++ b/python/ql/test/experimental/CWE-091/xsltSinks.py @@ -0,0 +1,56 @@ +from lxml import etree +from io import StringIO + +from django.urls import path +from django.http import HttpResponse +from django.template import Template, Context, Engine, engines + + +def a(request): + xslt_root = etree.XML('''\ + + + + + ''') + transform = etree.XSLT(xslt_root) + + +def b(request): + xslt_root = etree.XML('''\ + + + + + ''') + f = StringIO('') + tree = etree.parse(f) + result_tree = tree.xslt(xslt_root) + + +def c(request): + xslt_root = etree.XML('''\ + + + + + ''') + + f = StringIO('') + tree = etree.parse(f) + result = tree.xslt(xslt_root, a="'A'") + + +urlpatterns = [ + path('a', a), + path('b', b), + path('c', c) +] + +if __name__ == "__main__": + a(None) + b(None) + c(None) diff --git a/python/ql/test/query-tests/Security/lib/lxml/etree/__init__.py b/python/ql/test/query-tests/Security/lib/lxml/etree.py similarity index 100% rename from python/ql/test/query-tests/Security/lib/lxml/etree/__init__.py rename to python/ql/test/query-tests/Security/lib/lxml/etree.py