From fc8caa66c8ac4cb9b6a9a31f5ea24d0b50d3297a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 11:35:50 +0100 Subject: [PATCH 01/23] Python: Prepare for general content in type-tracker Due to the char-pred of Content, this change should keep exactly the same behavior as before. --- .../python/dataflow/new/TypeTracking.qll | 14 +++--- .../new/internal/TypeTrackingImpl.qll | 45 +++++-------------- 2 files changed, 21 insertions(+), 38 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll b/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll index 4f1810f059ef..9d0bcb3c487a 100644 --- a/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll +++ b/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll @@ -5,6 +5,7 @@ private import internal.TypeTrackingImpl as Impl import Impl::Shared::TypeTracking +private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic /** A string that may appear as the name of an attribute or access path. */ class AttributeName = Impl::TypeTrackingInput::Content; @@ -40,7 +41,11 @@ class TypeTracker extends Impl::TypeTracker { * Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`. * The type tracking only ends after the attribute has been loaded. */ - predicate startInAttr(string attrName) { this.startInContent(attrName) } + predicate startInAttr(string attrName) { + exists(DataFlowPublic::AttributeContent content | content.getAttribute() = attrName | + this.startInContent(content) + ) + } /** * INTERNAL. DO NOT USE. @@ -48,9 +53,8 @@ class TypeTracker extends Impl::TypeTracker { * Gets the attribute associated with this type tracker. */ string getAttr() { - result = this.getContent().asSome() - or - this.getContent().isNone() and - result = "" + if this.getContent().asSome() instanceof DataFlowPublic::AttributeContent + then result = this.getContent().asSome().(DataFlowPublic::AttributeContent).getAttribute() + else result = "" } } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 1a9bdb5202ee..8b6e53c8b745 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -97,24 +97,14 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; -/** - * Gets the name of a possible piece of content. For Python, this is currently only attribute names, - * using the name of the attribute for the corresponding content. - */ -private string getPossibleContentName() { - Stages::TypeTracking::ref() and // the TypeTracking::append() etc. predicates that we want to cache depend on this predicate, so we can place the `ref()` call here to get around identical files. - result = any(DataFlowPublic::AttrRef a).getAttributeName() -} - module TypeTrackingInput implements Shared::TypeTrackingInput { class Node = DataFlowPublic::Node; class LocalSourceNode = DataFlowPublic::LocalSourceNode; - class Content instanceof string { - Content() { this = getPossibleContentName() } - - string toString() { result = this } + class Content extends DataFlowPublic::Content { + // this char-pred is just a temporary restriction while transitioning to more general content + Content() { this instanceof DataFlowPublic::AttributeContent } } /** @@ -181,46 +171,35 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { * Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`. */ predicate storeStep(Node nodeFrom, Node nodeTo, Content content) { - exists(DataFlowPublic::AttrWrite a | - a.mayHaveAttributeName(content) and + exists(DataFlowPublic::AttrWrite a, string attrName | + content.(DataFlowPublic::AttributeContent).getAttribute() = attrName and + a.mayHaveAttributeName(attrName) and nodeFrom = a.getValue() and nodeTo = a.getObject() ) or - exists(DataFlowPublic::ContentSet contents | - contents.(DataFlowPublic::AttributeContent).getAttribute() = content - | - TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, contents) - ) + TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, content) } /** * Holds if `nodeTo` is the result of accessing the `content` content of `nodeFrom`. */ predicate loadStep(Node nodeFrom, LocalSourceNode nodeTo, Content content) { - exists(DataFlowPublic::AttrRead a | - a.mayHaveAttributeName(content) and + exists(DataFlowPublic::AttrRead a, string attrName | + content.(DataFlowPublic::AttributeContent).getAttribute() = attrName and + a.mayHaveAttributeName(attrName) and nodeFrom = a.getObject() and nodeTo = a ) or - exists(DataFlowPublic::ContentSet contents | - contents.(DataFlowPublic::AttributeContent).getAttribute() = content - | - TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, contents) - ) + TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, content) } /** * Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`. */ predicate loadStoreStep(Node nodeFrom, Node nodeTo, Content loadContent, Content storeContent) { - exists(DataFlowPublic::ContentSet loadContents, DataFlowPublic::ContentSet storeContents | - loadContents.(DataFlowPublic::AttributeContent).getAttribute() = loadContent and - storeContents.(DataFlowPublic::AttributeContent).getAttribute() = storeContent - | - TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContents, storeContents) - ) + TypeTrackerSummaryFlow::basicLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent) } /** From 636cf611ae8d5f9ac3025bacff6e028902e89f88 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 11:36:47 +0100 Subject: [PATCH 02/23] Python: Allow general content in type-tracker This should not result in many changes, since store/load steps are still only implemented for attributes. --- .../semmle/python/dataflow/new/internal/TypeTrackingImpl.qll | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 8b6e53c8b745..81c1f369561b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -102,10 +102,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { class LocalSourceNode = DataFlowPublic::LocalSourceNode; - class Content extends DataFlowPublic::Content { - // this char-pred is just a temporary restriction while transitioning to more general content - Content() { this instanceof DataFlowPublic::AttributeContent } - } + class Content = DataFlowPublic::Content; /** * A label to use for `WithContent` and `WithoutContent` steps, restricting From 7721fb33314cee322fd4b7ac3c8bd5bfe8b2e353 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 11:44:08 +0100 Subject: [PATCH 03/23] Python: Setup shared read/store steps --- .../dataflow/new/internal/DataFlowPrivate.qll | 14 ++++++++++++++ .../dataflow/new/internal/TypeTrackingImpl.qll | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 47f41d0cd057..5ccfa251634a 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -641,11 +641,18 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) { //-------- // Field flow //-------- +/** + * Subset of `storeStep` that should be shared with type-tracking. + */ +predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { none() } + /** * Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to * content `c`. */ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) { + storeStepCommon(nodeFrom, c, nodeTo) + or listStoreStep(nodeFrom, c, nodeTo) or setStoreStep(nodeFrom, c, nodeTo) @@ -891,10 +898,17 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) { ) } +/** + * Subset of `readStep` that should be shared with type-tracking. + */ +predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { none() } + /** * Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`. */ predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) { + readStepCommon(nodeFrom, c, nodeTo) + or subscriptReadStep(nodeFrom, c, nodeTo) or iterableUnpackingReadStep(nodeFrom, c, nodeTo) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 81c1f369561b..68779208de97 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -175,6 +175,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { nodeTo = a.getObject() ) or + DataFlowPrivate::storeStepCommon(nodeFrom, content, nodeTo) + or TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, content) } @@ -189,6 +191,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { nodeTo = a ) or + DataFlowPrivate::readStepCommon(nodeFrom, content, nodeTo) + or TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, content) } From a95bb7c86b1b9047fd5bcc6adf77ee195074a8c5 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 13:24:48 +0100 Subject: [PATCH 04/23] Python: Expand function reference in content test --- .../CallGraph/InlineCallGraphTest.expected | 3 +- .../CallGraph/code/func_ref_in_content.py | 53 +++++++++++++++++++ .../CallGraph/code/tuple_function_return.py | 15 ------ 3 files changed, 55 insertions(+), 16 deletions(-) create mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py delete mode 100644 python/ql/test/experimental/library-tests/CallGraph/code/tuple_function_return.py diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index 55774486be0d..504c5251a1a8 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -15,8 +15,9 @@ pointsTo_found_typeTracker_notFound | code/func_defined_outside_class.py:39:11:39:21 | ControlFlowNode for _gen() | B._gen | | code/func_defined_outside_class.py:42:1:42:7 | ControlFlowNode for Attribute() | B._gen.func | | code/func_defined_outside_class.py:43:1:43:7 | ControlFlowNode for Attribute() | B._gen.func | +| code/func_ref_in_content.py:17:1:17:4 | ControlFlowNode for f2() | func | +| code/func_ref_in_content.py:20:1:20:4 | ControlFlowNode for f3() | func | | code/funky_regression.py:15:9:15:17 | ControlFlowNode for Attribute() | Wat.f2 | -| code/tuple_function_return.py:15:1:15:4 | ControlFlowNode for f2() | func | | code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func | typeTracker_found_pointsTo_notFound | code/callable_as_argument.py:29:5:29:12 | ControlFlowNode for Attribute() | test_class.InsideTestFunc.sm | diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py new file mode 100644 index 000000000000..b89a013f5b1b --- /dev/null +++ b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py @@ -0,0 +1,53 @@ +def func(): + print("func()") + +def return_func(): + return func + +f1 = return_func() # $ pt,tt=return_func +f1() # $ pt,tt=func + + +def return_func_in_tuple(): + return (func, 42) + +tup = return_func_in_tuple() # $ pt,tt=return_func_in_tuple + +f2, _ = tup +f2() # $ pt=func MISSING: tt + +f3 = tup[0] +f3() # $ pt=func MISSING: tt + + +def return_func_in_dict(): + return {'func': func, 'val': 42} + +dct = return_func_in_dict() # $ pt,tt=return_func_in_dict + +f4 = dct['func'] +f4() # $ MISSING: tt=func + + +def return_func_in_dict_update(): + d = {} + d["func"] = func + return d + +dct2 = return_func_in_dict_update() # $ pt,tt=return_func_in_dict_update + +f5 = dct2['func'] +f5() # $ MISSING: tt=func + + +def return_func_in_list(): + return [func, 42] + +lst = return_func_in_list() # $ pt,tt=return_func_in_list + +f6 = lst[0] +f6() # $ MISSING: pt,tt=func + +if eval("False"): # don't run this, but fool analysis to still consider it (doesn't wok if you just to `if False:`) + f7 = lst[1] + f7() diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/tuple_function_return.py b/python/ql/test/experimental/library-tests/CallGraph/code/tuple_function_return.py deleted file mode 100644 index f87b1aa23e84..000000000000 --- a/python/ql/test/experimental/library-tests/CallGraph/code/tuple_function_return.py +++ /dev/null @@ -1,15 +0,0 @@ -def func(): - print("func()") - -def return_func(): - return func - -def return_func_in_tuple(): - return (func, 42) - -f1 = return_func() # $ pt,tt=return_func -f1() # $ pt,tt=func - - -f2, _ = return_func_in_tuple() # $ pt,tt=return_func_in_tuple -f2() # $ pt=func MISSING: tt From ece8245a4be1f05fffc2e9cd7acd5e387729538e Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 13:39:49 +0100 Subject: [PATCH 05/23] Python: type-track through tuple content --- .../python/dataflow/new/internal/DataFlowPrivate.qll | 12 ++++++------ .../CallGraph/InlineCallGraphTest.expected | 1 - .../CallGraph/code/func_ref_in_content.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 5ccfa251634a..22fb979f9dcb 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -644,7 +644,9 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) { /** * Subset of `storeStep` that should be shared with type-tracking. */ -predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { none() } +predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { + tupleStoreStep(nodeFrom, c, nodeTo) +} /** * Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to @@ -657,8 +659,6 @@ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) { or setStoreStep(nodeFrom, c, nodeTo) or - tupleStoreStep(nodeFrom, c, nodeTo) - or dictStoreStep(nodeFrom, c, nodeTo) or moreDictStoreSteps(nodeFrom, c, nodeTo) @@ -901,7 +901,9 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) { /** * Subset of `readStep` that should be shared with type-tracking. */ -predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { none() } +predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { + subscriptReadStep(nodeFrom, c, nodeTo) +} /** * Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`. @@ -909,8 +911,6 @@ predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { none() } predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) { readStepCommon(nodeFrom, c, nodeTo) or - subscriptReadStep(nodeFrom, c, nodeTo) - or iterableUnpackingReadStep(nodeFrom, c, nodeTo) or matchReadStep(nodeFrom, c, nodeTo) diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index 504c5251a1a8..667ebf28d755 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -16,7 +16,6 @@ pointsTo_found_typeTracker_notFound | code/func_defined_outside_class.py:42:1:42:7 | ControlFlowNode for Attribute() | B._gen.func | | code/func_defined_outside_class.py:43:1:43:7 | ControlFlowNode for Attribute() | B._gen.func | | code/func_ref_in_content.py:17:1:17:4 | ControlFlowNode for f2() | func | -| code/func_ref_in_content.py:20:1:20:4 | ControlFlowNode for f3() | func | | code/funky_regression.py:15:9:15:17 | ControlFlowNode for Attribute() | Wat.f2 | | code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func | typeTracker_found_pointsTo_notFound diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py index b89a013f5b1b..87abb4198e9c 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py @@ -17,7 +17,7 @@ def return_func_in_tuple(): f2() # $ pt=func MISSING: tt f3 = tup[0] -f3() # $ pt=func MISSING: tt +f3() # $ tt,pt=func def return_func_in_dict(): From 73fe596753357032044842f90816421ddd16490c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 13:57:57 +0100 Subject: [PATCH 06/23] Python: type-tracking through dictionary construction --- .../semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 4 ++-- .../library-tests/CallGraph/InlineCallGraphTest.expected | 1 + .../library-tests/CallGraph/code/func_ref_in_content.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 22fb979f9dcb..3b589da37d6d 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -646,6 +646,8 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) { */ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { tupleStoreStep(nodeFrom, c, nodeTo) + or + dictStoreStep(nodeFrom, c, nodeTo) } /** @@ -659,8 +661,6 @@ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) { or setStoreStep(nodeFrom, c, nodeTo) or - dictStoreStep(nodeFrom, c, nodeTo) - or moreDictStoreSteps(nodeFrom, c, nodeTo) or comprehensionStoreStep(nodeFrom, c, nodeTo) diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index 667ebf28d755..ab97d594a4e0 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -38,6 +38,7 @@ typeTracker_found_pointsTo_notFound | code/class_super.py:101:1:101:7 | ControlFlowNode for Attribute() | Z.foo | | code/class_super.py:108:1:108:8 | ControlFlowNode for Attribute() | Z.foo | | code/def_in_function.py:22:5:22:11 | ControlFlowNode for Attribute() | test.A.foo | +| code/func_ref_in_content.py:29:1:29:4 | ControlFlowNode for f4() | func | | code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | A.foo | | code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | ASub.foo | | code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | A.foo | diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py index 87abb4198e9c..57b11915c51b 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py @@ -26,7 +26,7 @@ def return_func_in_dict(): dct = return_func_in_dict() # $ pt,tt=return_func_in_dict f4 = dct['func'] -f4() # $ MISSING: tt=func +f4() # $ tt=func def return_func_in_dict_update(): From dac2b57bb029e345f79e0337f5ab1ba2a2137db7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 14:51:38 +0100 Subject: [PATCH 07/23] Python: type-track through dict-updates --- .../dataflow/new/internal/DataFlowPrivate.qll | 4 ++-- .../dataflow/new/internal/TypeTrackingImpl.qll | 13 ++++++++++++- .../CallGraph/InlineCallGraphTest.expected | 1 + .../CallGraph/code/func_ref_in_content.py | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 3b589da37d6d..98841726a746 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -648,6 +648,8 @@ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { tupleStoreStep(nodeFrom, c, nodeTo) or dictStoreStep(nodeFrom, c, nodeTo) + or + moreDictStoreSteps(nodeFrom, c, nodeTo) } /** @@ -661,8 +663,6 @@ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) { or setStoreStep(nodeFrom, c, nodeTo) or - moreDictStoreSteps(nodeFrom, c, nodeTo) - or comprehensionStoreStep(nodeFrom, c, nodeTo) or iterableUnpackingStoreStep(nodeFrom, c, nodeTo) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 68779208de97..8b3e1a95ef10 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -175,7 +175,18 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { nodeTo = a.getObject() ) or - DataFlowPrivate::storeStepCommon(nodeFrom, content, nodeTo) + // type-tracking doesn't really handle PostUpdateNodes, so for some assignment steps + // like `my_dict["foo"] = foo` the data-flow step targets the PostUpdateNode for + // `my_dict`, where we want to translate that into a type-tracking step that targets + // the normal/non-PostUpdateNode for `my_dict`. + exists(DataFlowPublic::Node storeTarget | + DataFlowPrivate::storeStepCommon(nodeFrom, content, storeTarget) + | + not storeTarget instanceof DataFlowPrivate::SyntheticPostUpdateNode and + nodeTo = storeTarget + or + nodeTo = storeTarget.(DataFlowPrivate::SyntheticPostUpdateNode).getPreUpdateNode() + ) or TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, content) } diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index ab97d594a4e0..378b2c64957c 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -39,6 +39,7 @@ typeTracker_found_pointsTo_notFound | code/class_super.py:108:1:108:8 | ControlFlowNode for Attribute() | Z.foo | | code/def_in_function.py:22:5:22:11 | ControlFlowNode for Attribute() | test.A.foo | | code/func_ref_in_content.py:29:1:29:4 | ControlFlowNode for f4() | func | +| code/func_ref_in_content.py:40:1:40:4 | ControlFlowNode for f5() | func | | code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | A.foo | | code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | ASub.foo | | code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | A.foo | diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py index 57b11915c51b..4bea545cb0f2 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py @@ -37,7 +37,7 @@ def return_func_in_dict_update(): dct2 = return_func_in_dict_update() # $ pt,tt=return_func_in_dict_update f5 = dct2['func'] -f5() # $ MISSING: tt=func +f5() # $ tt=func def return_func_in_list(): From 0cf3fe4a4c5d7772d2efefa0d45cf2d89e46c160 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 14:59:07 +0100 Subject: [PATCH 08/23] Python: Expand dict update tests --- .../CallGraph/InlineCallGraphTest.expected | 8 +++++--- .../CallGraph/code/func_ref_in_content.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index 378b2c64957c..c5b7d6dc473c 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -15,7 +15,7 @@ pointsTo_found_typeTracker_notFound | code/func_defined_outside_class.py:39:11:39:21 | ControlFlowNode for _gen() | B._gen | | code/func_defined_outside_class.py:42:1:42:7 | ControlFlowNode for Attribute() | B._gen.func | | code/func_defined_outside_class.py:43:1:43:7 | ControlFlowNode for Attribute() | B._gen.func | -| code/func_ref_in_content.py:17:1:17:4 | ControlFlowNode for f2() | func | +| code/func_ref_in_content.py:20:1:20:4 | ControlFlowNode for f2() | func | | code/funky_regression.py:15:9:15:17 | ControlFlowNode for Attribute() | Wat.f2 | | code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func | typeTracker_found_pointsTo_notFound @@ -38,8 +38,10 @@ typeTracker_found_pointsTo_notFound | code/class_super.py:101:1:101:7 | ControlFlowNode for Attribute() | Z.foo | | code/class_super.py:108:1:108:8 | ControlFlowNode for Attribute() | Z.foo | | code/def_in_function.py:22:5:22:11 | ControlFlowNode for Attribute() | test.A.foo | -| code/func_ref_in_content.py:29:1:29:4 | ControlFlowNode for f4() | func | -| code/func_ref_in_content.py:40:1:40:4 | ControlFlowNode for f5() | func | +| code/func_ref_in_content.py:32:1:32:4 | ControlFlowNode for f4() | func | +| code/func_ref_in_content.py:46:1:46:4 | ControlFlowNode for f5() | func | +| code/func_ref_in_content.py:48:1:48:15 | ControlFlowNode for Subscript() | func2 | +| code/func_ref_in_content.py:50:1:50:19 | ControlFlowNode for Subscript() | func2 | | code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | A.foo | | code/isinstance.py:9:13:9:22 | ControlFlowNode for Attribute() | ASub.foo | | code/isinstance.py:14:13:14:22 | ControlFlowNode for Attribute() | A.foo | diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py index 4bea545cb0f2..b249ec0b2e98 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py @@ -1,6 +1,9 @@ def func(): print("func()") +def func2(): + print("func2()") + def return_func(): return func @@ -32,6 +35,9 @@ def return_func_in_dict(): def return_func_in_dict_update(): d = {} d["func"] = func + d["func2"] = func2 + d["contested"] = func + d["contested"] = func2 return d dct2 = return_func_in_dict_update() # $ pt,tt=return_func_in_dict_update @@ -39,6 +45,10 @@ def return_func_in_dict_update(): f5 = dct2['func'] f5() # $ tt=func +dct2['func2']() # $ tt=func2 + +dct2['contested']() # $ tt=func2 SPURIOUS: tt=func + def return_func_in_list(): return [func, 42] From 92729dbbd659bdac0c8f9e31699161806fb80e73 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 23 Feb 2024 15:27:10 +0100 Subject: [PATCH 09/23] Python: Support iterable unpacking in type-tracking --- .../python/dataflow/new/internal/DataFlowPrivate.qll | 8 ++++---- .../library-tests/CallGraph/InlineCallGraphTest.expected | 1 - .../library-tests/CallGraph/code/func_ref_in_content.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index 98841726a746..f2a523775443 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -650,6 +650,8 @@ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { dictStoreStep(nodeFrom, c, nodeTo) or moreDictStoreSteps(nodeFrom, c, nodeTo) + or + iterableUnpackingStoreStep(nodeFrom, c, nodeTo) } /** @@ -665,8 +667,6 @@ predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) { or comprehensionStoreStep(nodeFrom, c, nodeTo) or - iterableUnpackingStoreStep(nodeFrom, c, nodeTo) - or attributeStoreStep(nodeFrom, c, nodeTo) or matchStoreStep(nodeFrom, c, nodeTo) @@ -903,6 +903,8 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) { */ predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { subscriptReadStep(nodeFrom, c, nodeTo) + or + iterableUnpackingReadStep(nodeFrom, c, nodeTo) } /** @@ -911,8 +913,6 @@ predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) { readStepCommon(nodeFrom, c, nodeTo) or - iterableUnpackingReadStep(nodeFrom, c, nodeTo) - or matchReadStep(nodeFrom, c, nodeTo) or forReadStep(nodeFrom, c, nodeTo) diff --git a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected index c5b7d6dc473c..ef82a9ad20c4 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected +++ b/python/ql/test/experimental/library-tests/CallGraph/InlineCallGraphTest.expected @@ -15,7 +15,6 @@ pointsTo_found_typeTracker_notFound | code/func_defined_outside_class.py:39:11:39:21 | ControlFlowNode for _gen() | B._gen | | code/func_defined_outside_class.py:42:1:42:7 | ControlFlowNode for Attribute() | B._gen.func | | code/func_defined_outside_class.py:43:1:43:7 | ControlFlowNode for Attribute() | B._gen.func | -| code/func_ref_in_content.py:20:1:20:4 | ControlFlowNode for f2() | func | | code/funky_regression.py:15:9:15:17 | ControlFlowNode for Attribute() | Wat.f2 | | code/type_tracking_limitation.py:8:1:8:3 | ControlFlowNode for x() | my_func | typeTracker_found_pointsTo_notFound diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py index b249ec0b2e98..24518ace0885 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py @@ -17,7 +17,7 @@ def return_func_in_tuple(): tup = return_func_in_tuple() # $ pt,tt=return_func_in_tuple f2, _ = tup -f2() # $ pt=func MISSING: tt +f2() # $ pt,tt=func f3 = tup[0] f3() # $ tt,pt=func From 8a7ffac19c9162fdfbde38aa859fc114ad927ba6 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 1 Mar 2024 12:13:43 +0100 Subject: [PATCH 10/23] Python: Accept consistency failure --- .../CallGraph/CONSISTENCY/TypeTrackingConsistency.expected | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 python/ql/test/experimental/library-tests/CallGraph/CONSISTENCY/TypeTrackingConsistency.expected diff --git a/python/ql/test/experimental/library-tests/CallGraph/CONSISTENCY/TypeTrackingConsistency.expected b/python/ql/test/experimental/library-tests/CallGraph/CONSISTENCY/TypeTrackingConsistency.expected new file mode 100644 index 000000000000..6aed7c838132 --- /dev/null +++ b/python/ql/test/experimental/library-tests/CallGraph/CONSISTENCY/TypeTrackingConsistency.expected @@ -0,0 +1,2 @@ +| code/func_ref_in_content.py:19:1:19:5 | IterableElement | Unreachable node in step of kind store Tuple element at index 0. | +| code/func_ref_in_content.py:19:1:19:5 | IterableElement | Unreachable node in step of kind store Tuple element at index 1. | From 4d78762ba85ddb1373488ef1b0f45fc2a9f9b2a3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 1 Mar 2024 12:14:14 +0100 Subject: [PATCH 11/23] Python: Ignore consistency failure --- python/ql/consistency-queries/TypeTrackingConsistency.ql | 7 +++++++ .../CallGraph/CONSISTENCY/TypeTrackingConsistency.expected | 2 -- 2 files changed, 7 insertions(+), 2 deletions(-) delete mode 100644 python/ql/test/experimental/library-tests/CallGraph/CONSISTENCY/TypeTrackingConsistency.expected diff --git a/python/ql/consistency-queries/TypeTrackingConsistency.ql b/python/ql/consistency-queries/TypeTrackingConsistency.ql index 150832290020..551573a7aef7 100644 --- a/python/ql/consistency-queries/TypeTrackingConsistency.ql +++ b/python/ql/consistency-queries/TypeTrackingConsistency.ql @@ -36,6 +36,13 @@ private module ConsistencyChecksInput implements ConsistencyChecksInputSig { // which I couldn't just fix. We ignore the problems here, and instead rely on the // test-case added in https://github.com/github/codeql/pull/15841 n.getLocation().getFile().getAbsolutePath().matches("%/socketserver.py") + or + // for iterable unpacking like `a,b = some_list`, we currently don't want to allow + // type-tracking... however, in the future when we allow tracking list indexes + // precisely (that is, move away from ListElementContent), we should ensure we have + // proper flow to the synthetic `IterableElementNode`. + exists(DataFlow::ListElementContent c) and + n instanceof DataFlow::IterableElementNode } } diff --git a/python/ql/test/experimental/library-tests/CallGraph/CONSISTENCY/TypeTrackingConsistency.expected b/python/ql/test/experimental/library-tests/CallGraph/CONSISTENCY/TypeTrackingConsistency.expected deleted file mode 100644 index 6aed7c838132..000000000000 --- a/python/ql/test/experimental/library-tests/CallGraph/CONSISTENCY/TypeTrackingConsistency.expected +++ /dev/null @@ -1,2 +0,0 @@ -| code/func_ref_in_content.py:19:1:19:5 | IterableElement | Unreachable node in step of kind store Tuple element at index 0. | -| code/func_ref_in_content.py:19:1:19:5 | IterableElement | Unreachable node in step of kind store Tuple element at index 1. | From fa0c4e18fcbe2638af0266bbd86d209e2feec054 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 4 Mar 2024 16:05:18 +0100 Subject: [PATCH 12/23] Python: Expand dict-content tt test even more While it might be useful to track content to any lookup, it's not something we do right now. --- .../CallGraph/code/func_ref_in_content.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py index 24518ace0885..eee8f29778be 100644 --- a/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py +++ b/python/ql/test/experimental/library-tests/CallGraph/code/func_ref_in_content.py @@ -50,6 +50,17 @@ def return_func_in_dict_update(): dct2['contested']() # $ tt=func2 SPURIOUS: tt=func +## non-precise access is not supported right now +for k in dct2: + dct2[k]() # $ MISSING: tt=func tt=func2 + +for v in dct2.values(): + v() # $ MISSING: tt=func tt=func2 + +for k, v in dct2.items(): + v() # $ MISSING: tt=func tt=func2 + + def return_func_in_list(): return [func, 42] From 7de304bf1680d61f7b6309cb840a941445f9caf9 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 12 Mar 2024 16:07:53 +0100 Subject: [PATCH 13/23] Python: Add proper type-tracking tests for content Instead of just relying on the call-graph tests --- .../dataflow/typetracking/content_test.py | 78 +++++++++++++++++++ .../dataflow/typetracking/tracked.ql | 8 ++ 2 files changed, 86 insertions(+) create mode 100644 python/ql/test/experimental/dataflow/typetracking/content_test.py diff --git a/python/ql/test/experimental/dataflow/typetracking/content_test.py b/python/ql/test/experimental/dataflow/typetracking/content_test.py new file mode 100644 index 000000000000..ac201f233013 --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking/content_test.py @@ -0,0 +1,78 @@ +# test of other content types than attributes + +def test_tuple(index_arg): + tup = (tracked, other) # $tracked + + tup[0] # $ tracked + tup[1] + + a,b = tup # $tracked + a # $ tracked + b + + # non-precise access is not supported right now (and it's not 100% clear if we want + # to support it, or if it will lead to bad results) + tup[index_arg] + + for x in tup: + print(x) + + for i in range(len(tup)): + print(tup[i]) + + +def test_dict(key_arg): + d1 = {"t": tracked, "o": other} # $tracked + d1["t"] # $ tracked + d1.get("t") # $ MISSING: tracked + d1.setdefault("t") # $ MISSING: tracked + + d1["o"] + d1.get("o") + d1.setdefault("o") + + + # non-precise access is not supported right now (and it's not 100% clear if we want + # to support it, or if it will lead to bad results) + d1[key_arg] + + for k in d1: + d1[k] + + for v in d1.values(): + v + + for k, v in d1.items(): + v + + + # construction with inline updates + d2 = dict() + d2["t"] = tracked # $ tracked + d2["o"] = other + + d2["t"] # $ tracked + d2["o"] + + # notice that time-travel is also possible (just as with attributes) + d3 = dict() + d3["t"] # $ SPURIOUS: tracked + d3["t"] = tracked # $ tracked + d3["t"] # $ tracked + + +def test_list(index_arg): + l = [tracked, other] # $tracked + + l[0] # $ MISSING: tracked + l[1] + + # non-precise access is not supported right now (and it's not 100% clear if we want + # to support it, or if it will lead to bad results) + l[index_arg] + + for x in l: + print(x) + + for i in range(len(l)): + print(l[i]) diff --git a/python/ql/test/experimental/dataflow/typetracking/tracked.ql b/python/ql/test/experimental/dataflow/typetracking/tracked.ql index ca893688256c..8bad0e33ead8 100644 --- a/python/ql/test/experimental/dataflow/typetracking/tracked.ql +++ b/python/ql/test/experimental/dataflow/typetracking/tracked.ql @@ -30,6 +30,14 @@ module TrackedTest implements TestSig { not e instanceof DataFlow::ScopeEntryDefinitionNode and // ...same for `SynthCaptureNode`s not e instanceof DP::SynthCaptureNode and + // after starting to track all kinds of content, we generally just want to show + // annotations after reading the tracked data out again. (we keep the old + // attribute logic to not rewrite all our tests) + ( + t.getContent().isNone() + or + t.getContent().asSome() instanceof DataFlow::AttributeContent + ) and tag = "tracked" and location = e.getLocation() and value = t.getAttr() and From 2b09b084e0393193e5daf97a1a512538c9aa4b57 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 12 Mar 2024 17:43:43 +0100 Subject: [PATCH 14/23] Python: Add change-note --- python/ql/lib/change-notes/2024-03-12-typetracking-content.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2024-03-12-typetracking-content.md diff --git a/python/ql/lib/change-notes/2024-03-12-typetracking-content.md b/python/ql/lib/change-notes/2024-03-12-typetracking-content.md new file mode 100644 index 000000000000..5ad93a657aed --- /dev/null +++ b/python/ql/lib/change-notes/2024-03-12-typetracking-content.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Improved the type-tracking capabilities (and therefore also API graphs) to allow tracking items in tuples and dictionaries. From af8cef5b535b068057a58654701cf27ca2641e81 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 12 Mar 2024 17:57:32 +0100 Subject: [PATCH 15/23] Python: Fixup deprecated type-tracker API --- .../dataflow/new/internal/TypeTracker.qll | 46 +++++++++++++++---- .../new/internal/TypeTrackerSpecific.qll | 2 +- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll index 0f6ff8bd3bd2..01c881b23169 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll @@ -1,6 +1,7 @@ /** Step Summaries and Type Tracking */ private import TypeTrackerSpecific +private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic cached private module Cached { @@ -12,10 +13,22 @@ private module Cached { LevelStep() or CallStep() or ReturnStep() or - deprecated StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or - deprecated LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or + deprecated StoreStep(TypeTrackerContent content) { + exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content | + basicStoreStep(_, _, dfc) + ) + } or + deprecated LoadStep(TypeTrackerContent content) { + exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content | + basicLoadStep(_, _, dfc) + ) + } or deprecated LoadStoreStep(TypeTrackerContent load, TypeTrackerContent store) { - basicLoadStoreStep(_, _, load, store) + exists(DataFlowPublic::AttributeContent dfcLoad, DataFlowPublic::AttributeContent dfcStore | + dfcLoad.getAttribute() = load and dfcStore.getAttribute() = store + | + basicLoadStoreStep(_, _, dfcLoad, dfcStore) + ) } or deprecated WithContent(ContentFilter filter) { basicWithContentStep(_, _, filter) } or deprecated WithoutContent(ContentFilter filter) { basicWithoutContentStep(_, _, filter) } or @@ -29,13 +42,13 @@ private module Cached { // Restrict `content` to those that might eventually match a load. // We can't rely on `basicStoreStep` since `startInContent` might be used with // a content that has no corresponding store. - exists(TypeTrackerContent loadContents | + exists(DataFlowPublic::AttributeContent loadContents | ( basicLoadStep(_, _, loadContents) or basicLoadStoreStep(_, _, loadContents, _) ) and - compatibleContents(content, loadContents) + compatibleContents(content, loadContents.getAttribute()) ) } @@ -45,13 +58,13 @@ private module Cached { content = noContent() or // As in MkTypeTracker, restrict `content` to those that might eventually match a store. - exists(TypeTrackerContent storeContent | + exists(DataFlowPublic::AttributeContent storeContent | ( basicStoreStep(_, _, storeContent) or basicLoadStoreStep(_, _, _, storeContent) ) and - compatibleContents(storeContent, content) + compatibleContents(storeContent.getAttribute(), content) ) } @@ -198,7 +211,10 @@ private module Cached { flowsToStoreStep(nodeFrom, nodeTo, content) and summary = StoreStep(content) or - basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content) + exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content | + basicLoadStep(nodeFrom, nodeTo, dfc) + ) and + summary = LoadStep(content) ) or exists(TypeTrackerContent loadContent, TypeTrackerContent storeContent | @@ -281,7 +297,12 @@ deprecated private predicate smallstepProj(Node nodeFrom, StepSummary summary) { deprecated private predicate flowsToStoreStep( Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content ) { - exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content)) + exists(Node obj | + nodeTo.flowsTo(obj) and + exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content | + basicStoreStep(nodeFrom, obj, dfc) + ) + ) } /** @@ -292,7 +313,12 @@ deprecated private predicate flowsToLoadStoreStep( TypeTrackerContent storeContent ) { exists(Node obj | - nodeTo.flowsTo(obj) and basicLoadStoreStep(nodeFrom, obj, loadContent, storeContent) + nodeTo.flowsTo(obj) and + exists(DataFlowPublic::AttributeContent loadDfc, DataFlowPublic::AttributeContent storeDfc | + loadDfc.getAttribute() = loadContent and storeDfc.getAttribute() = storeContent + | + basicLoadStoreStep(nodeFrom, obj, loadDfc, storeDfc) + ) ) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll index c31cfeb53310..11cce1446f75 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll @@ -15,7 +15,7 @@ deprecated class OptionalTypeTrackerContent extends string { OptionalTypeTrackerContent() { this = "" or - this instanceof TypeTrackingImpl::TypeTrackingInput::Content + this = any(DataFlowPublic::AttributeContent dfc).getAttribute() } } From 6ffaad1bc8cfb0812549780d6547c8e37a2dcea9 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 12 Mar 2024 22:32:40 +0100 Subject: [PATCH 16/23] Python: Expand type-tracking tests with nested tuples I was initially surprised to see that this didn't work, until I remembered that type-tracking only works with content of depth 1. --- .../TypeTrackingConsistency.expected | 13 +++++++++++ .../dataflow/typetracking/content_test.py | 22 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 python/ql/test/experimental/dataflow/typetracking/CONSISTENCY/TypeTrackingConsistency.expected diff --git a/python/ql/test/experimental/dataflow/typetracking/CONSISTENCY/TypeTrackingConsistency.expected b/python/ql/test/experimental/dataflow/typetracking/CONSISTENCY/TypeTrackingConsistency.expected new file mode 100644 index 000000000000..6fc4df916995 --- /dev/null +++ b/python/ql/test/experimental/dataflow/typetracking/CONSISTENCY/TypeTrackingConsistency.expected @@ -0,0 +1,13 @@ +unreachableNode +| content_test.py:31:6:31:11 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 0. | +| content_test.py:31:6:31:11 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 1. | +| content_test.py:31:6:31:11 | ControlFlowNode for Tuple | Unreachable node in step of kind storeTarget. | +| content_test.py:31:16:31:21 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 0. | +| content_test.py:31:16:31:21 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 1. | +| content_test.py:31:16:31:21 | ControlFlowNode for Tuple | Unreachable node in step of kind storeTarget. | +| content_test.py:40:10:40:13 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 0. | +| content_test.py:40:10:40:13 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 1. | +| content_test.py:40:10:40:13 | ControlFlowNode for Tuple | Unreachable node in step of kind storeTarget. | +| content_test.py:66:9:66:12 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 0. | +| content_test.py:66:9:66:12 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 1. | +| content_test.py:66:9:66:12 | ControlFlowNode for Tuple | Unreachable node in step of kind storeTarget. | diff --git a/python/ql/test/experimental/dataflow/typetracking/content_test.py b/python/ql/test/experimental/dataflow/typetracking/content_test.py index ac201f233013..1c52d659582a 100644 --- a/python/ql/test/experimental/dataflow/typetracking/content_test.py +++ b/python/ql/test/experimental/dataflow/typetracking/content_test.py @@ -21,6 +21,28 @@ def test_tuple(index_arg): print(tup[i]) + # nested tuples + nested_tuples = ((tracked, other), (other, tracked)) # $tracked + + nested_tuples[0][0] # $ MISSING: tracked + nested_tuples[0][1] + nested_tuples[1][0] + nested_tuples[1][1] # $ MISSING: tracked + + (aa, ab), (ba, bb) = nested_tuples + aa # $ MISSING: tracked + ab + ba + bb # $ MISSING: tracked + + + # non-precise access is not supported right now (and it's not 100% clear if we want + # to support it, or if it will lead to bad results) + for (x, y) in nested_tuples: + x + y + + def test_dict(key_arg): d1 = {"t": tracked, "o": other} # $tracked d1["t"] # $ tracked From 7a3ee0f5f8145abe25b26a64415a43ed32b7016e Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 13 Mar 2024 16:41:42 +0100 Subject: [PATCH 17/23] Python: Make `IterableSequenceNode` LocalSourceNode We do this to remove the inconsistencies, and to be ready for a future where type-tracking support content tracker of depth > 1. It works because targets of loadSteps needs to be LocalSourceNodes predicate loadStep(Node nodeFrom, LocalSourceNode nodeTo, Content content) { --- .../python/dataflow/new/internal/LocalSources.qll | 2 ++ .../CONSISTENCY/TypeTrackingConsistency.expected | 13 ------------- 2 files changed, 2 insertions(+), 13 deletions(-) delete mode 100644 python/ql/test/experimental/dataflow/typetracking/CONSISTENCY/TypeTrackingConsistency.expected diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index 34b137b35115..92d9e5887ad8 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -74,6 +74,8 @@ class LocalSourceNode extends Node { this instanceof ScopeEntryDefinitionNode or this instanceof ParameterNode + or + this instanceof IterableSequenceNode } /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ diff --git a/python/ql/test/experimental/dataflow/typetracking/CONSISTENCY/TypeTrackingConsistency.expected b/python/ql/test/experimental/dataflow/typetracking/CONSISTENCY/TypeTrackingConsistency.expected deleted file mode 100644 index 6fc4df916995..000000000000 --- a/python/ql/test/experimental/dataflow/typetracking/CONSISTENCY/TypeTrackingConsistency.expected +++ /dev/null @@ -1,13 +0,0 @@ -unreachableNode -| content_test.py:31:6:31:11 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 0. | -| content_test.py:31:6:31:11 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 1. | -| content_test.py:31:6:31:11 | ControlFlowNode for Tuple | Unreachable node in step of kind storeTarget. | -| content_test.py:31:16:31:21 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 0. | -| content_test.py:31:16:31:21 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 1. | -| content_test.py:31:16:31:21 | ControlFlowNode for Tuple | Unreachable node in step of kind storeTarget. | -| content_test.py:40:10:40:13 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 0. | -| content_test.py:40:10:40:13 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 1. | -| content_test.py:40:10:40:13 | ControlFlowNode for Tuple | Unreachable node in step of kind storeTarget. | -| content_test.py:66:9:66:12 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 0. | -| content_test.py:66:9:66:12 | ControlFlowNode for Tuple | Unreachable node in step of kind load Tuple element at index 1. | -| content_test.py:66:9:66:12 | ControlFlowNode for Tuple | Unreachable node in step of kind storeTarget. | From 00f2a6a65e76336e0cd926a1630dd0001ed3bd14 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 15 Mar 2024 10:14:45 +0100 Subject: [PATCH 18/23] Python: Update ssa-compute test expectations --- .../CONSISTENCY/TypeTrackingConsistency.expected | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ql/test/library-tests/essa/ssa-compute/CONSISTENCY/TypeTrackingConsistency.expected b/python/ql/test/library-tests/essa/ssa-compute/CONSISTENCY/TypeTrackingConsistency.expected index 81d19f3f20d7..0e829fd207f0 100644 --- a/python/ql/test/library-tests/essa/ssa-compute/CONSISTENCY/TypeTrackingConsistency.expected +++ b/python/ql/test/library-tests/essa/ssa-compute/CONSISTENCY/TypeTrackingConsistency.expected @@ -1,6 +1,6 @@ unreachableNode -| test2.py:16:17:16:17 | ControlFlowNode for y | Unreachable node in step of kind load bar. | -| test2.py:25:23:25:23 | ControlFlowNode for x | Unreachable node in step of kind load attribute. | +| test2.py:16:17:16:17 | ControlFlowNode for y | Unreachable node in step of kind load Attribute bar. | +| test2.py:25:23:25:23 | ControlFlowNode for x | Unreachable node in step of kind load Attribute attribute. | | test2.py:25:23:25:23 | ControlFlowNode for x | Unreachable node in step of kind simpleLocalSmallStep. | -| test2.py:26:17:26:17 | ControlFlowNode for y | Unreachable node in step of kind load bar. | +| test2.py:26:17:26:17 | ControlFlowNode for y | Unreachable node in step of kind load Attribute bar. | | test2.py:27:23:27:23 | ControlFlowNode for x | Unreachable node in step of kind simpleLocalSmallStep. | From 6babb2ff909d750c908ed3f2faadc8a5285fe81e Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 15 Mar 2024 10:24:33 +0100 Subject: [PATCH 19/23] Python: Accept .expected for `typetracking-summaries` --- .../experimental/dataflow/typetracking-summaries/summaries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py index e11f451b8652..89b5e1756d5c 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py @@ -41,8 +41,8 @@ def explicit_identity(x): tms # $ MISSING: tracked another_tainted_list = TTS_append_to_list([], tracked) # $ tracked -atl = another_tainted_list[0] -atl # $ MISSING: tracked +atl = another_tainted_list[0] # $ tracked +atl # $ tracked # This will not work, as the call is not found by `getACallSimple`. from json import loads as json_loads From 7eb4419342e08b9f537cd75e1db2e0f5693c55fe Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 15 Mar 2024 10:24:57 +0100 Subject: [PATCH 20/23] Python: Restrict type-tracking content to only be precise At least for now :) --- .../dataflow/new/internal/TypeTrackingImpl.qll | 16 +++++++++++++++- .../dataflow/typetracking-summaries/summaries.py | 4 ++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index 8b3e1a95ef10..ce95a6cca4e2 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -102,7 +102,21 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { class LocalSourceNode = DataFlowPublic::LocalSourceNode; - class Content = DataFlowPublic::Content; + class Content extends DataFlowPublic::Content { + Content() { + // TODO: for now, it's not 100% clear if should support non-precise content in + // type-tracking, or if it will lead to bad results. We start with only allowing + // precise content, which should always be a good improvement! It also simplifies + // the process of examining new results from non-precise content steps in the + // future, since you will _only_ have to look over the results from the new + // non-precise steps. + this instanceof DataFlowPublic::AttributeContent + or + this instanceof DataFlowPublic::DictionaryElementContent + or + this instanceof DataFlowPublic::TupleElementContent + } + } /** * A label to use for `WithContent` and `WithoutContent` steps, restricting diff --git a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py index 89b5e1756d5c..e11f451b8652 100644 --- a/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py +++ b/python/ql/test/experimental/dataflow/typetracking-summaries/summaries.py @@ -41,8 +41,8 @@ def explicit_identity(x): tms # $ MISSING: tracked another_tainted_list = TTS_append_to_list([], tracked) # $ tracked -atl = another_tainted_list[0] # $ tracked -atl # $ tracked +atl = another_tainted_list[0] +atl # $ MISSING: tracked # This will not work, as the call is not found by `getACallSimple`. from json import loads as json_loads From 20202aba908a7d0a3e6fc3aa64d2991e838e8768 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Apr 2024 13:21:46 +0200 Subject: [PATCH 21/23] Python: Deprecate `AttributeName` --- python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll b/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll index 9d0bcb3c487a..8d1c691915b3 100644 --- a/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll +++ b/python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll @@ -7,8 +7,12 @@ private import internal.TypeTrackingImpl as Impl import Impl::Shared::TypeTracking private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic -/** A string that may appear as the name of an attribute or access path. */ -class AttributeName = Impl::TypeTrackingInput::Content; +/** + * DEPRECATED. + * + * A string that may appear as the name of an attribute or access path. + */ +deprecated class AttributeName = Impl::TypeTrackingInput::Content; /** * A summary of the steps needed to track a value to a given dataflow node. From 8707a63edb9440890234bc4ab0019c099087f56f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Apr 2024 13:26:26 +0200 Subject: [PATCH 22/23] Python: Add comments around `storeStepCommon` --- .../python/dataflow/new/internal/DataFlowPrivate.qll | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index f2a523775443..1ad6d0f7e6ed 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -643,6 +643,13 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) { //-------- /** * Subset of `storeStep` that should be shared with type-tracking. + * + * NOTE: This does not include attributeStoreStep right now, since it has its' own + * modeling in the type-tracking library (which is slightly different due to + * PostUpdateNodes). + * + * As of 2024-04-02 the type-tracking library only supports precise content, so there is + * no reason to include steps for list content right now. */ predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) { tupleStoreStep(nodeFrom, c, nodeTo) From a22b9947c02eee14b05205ab7c520a2add13de5b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 2 Apr 2024 14:52:36 +0200 Subject: [PATCH 23/23] Python: Revert `IterableSequenceNode` as LocalSourceNode When looking things over a bit more, we could actually exclude the steps that would never be used instead. A much more involved solution, but more performance oriented and clear in terms of what is supported (at least until we start supporting type-tracking with more than depth 1 access-path, if that ever happens) --- .../TypeTrackingConsistency.ql | 4 -- .../dataflow/new/internal/LocalSources.qll | 2 - .../new/internal/TypeTrackingImpl.qll | 45 +++++++++++++++++-- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/python/ql/consistency-queries/TypeTrackingConsistency.ql b/python/ql/consistency-queries/TypeTrackingConsistency.ql index 551573a7aef7..645bdef52194 100644 --- a/python/ql/consistency-queries/TypeTrackingConsistency.ql +++ b/python/ql/consistency-queries/TypeTrackingConsistency.ql @@ -27,10 +27,6 @@ private module ConsistencyChecksInput implements ConsistencyChecksInputSig { TypeTrackingInput::simpleLocalSmallStep*(m, n) ) or - // TODO: when adding support for proper content, handle iterable unpacking better - // such as `for k,v in items:`, or `a, (b,c) = ...` - n instanceof DataFlow::IterableSequenceNode - or // We have missing use-use flow in // https://github.com/python/cpython/blob/0fb18b02c8ad56299d6a2910be0bab8ad601ef24/Lib/socketserver.py#L276-L303 // which I couldn't just fix. We ignore the problems here, and instead rely on the diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll index 92d9e5887ad8..34b137b35115 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll @@ -74,8 +74,6 @@ class LocalSourceNode extends Node { this instanceof ScopeEntryDefinitionNode or this instanceof ParameterNode - or - this instanceof IterableSequenceNode } /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index ce95a6cca4e2..42ce5cdd2377 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -8,6 +8,7 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPr private import codeql.typetracking.internal.SummaryTypeTracker as SummaryTypeTracker private import semmle.python.dataflow.new.internal.FlowSummaryImpl as FlowSummaryImpl private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch +private import semmle.python.dataflow.new.internal.IterableUnpacking as IterableUnpacking private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { // Dataflow nodes @@ -135,7 +136,27 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { } /** Holds if there is a simple local flow step from `nodeFrom` to `nodeTo` */ - predicate simpleLocalSmallStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2; + predicate simpleLocalSmallStep(Node nodeFrom, Node nodeTo) { + DataFlowPrivate::simpleLocalFlowStepForTypetracking(nodeFrom, nodeTo) and + // for `for k,v in foo` no need to do local flow step from the synthetic sequence + // node for `k,v` to the tuple `k,v` -- since type-tracking only supports one level + // of content tracking, and there is one read-step from `foo` the synthetic sequence + // node required, we can skip the flow step from the synthetic sequence node to the + // tuple itself, since the read-step from the tuple to the tuple elements will not + // matter. + not ( + IterableUnpacking::iterableUnpackingForReadStep(_, _, nodeFrom) and + IterableUnpacking::iterableUnpackingTupleFlowStep(nodeFrom, nodeTo) + ) and + // for nested iterable unpacking, such as `[[a]] = foo` or `((a,b),) = bar`, we can + // ignore the flow steps from the synthetic sequence node to the real sequence node, + // since we only support one level of content in type-trackers, and the nested + // structure requires two levels at least to be useful. + not exists(SequenceNode outer | + outer.getAnElement() = nodeTo.asCfgNode() and + IterableUnpacking::iterableUnpackingTupleFlowStep(nodeFrom, nodeTo) + ) + } /** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */ predicate levelStepCall(Node nodeFrom, LocalSourceNode nodeTo) { none() } @@ -200,7 +221,10 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { nodeTo = storeTarget or nodeTo = storeTarget.(DataFlowPrivate::SyntheticPostUpdateNode).getPreUpdateNode() - ) + ) and + // when only supporting precise content, no need for IterableElementNode (since it + // is only fed set/list content) + not nodeFrom instanceof DataFlowPublic::IterableElementNode or TypeTrackerSummaryFlow::basicStoreStep(nodeFrom, nodeTo, content) } @@ -216,7 +240,22 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { nodeTo = a ) or - DataFlowPrivate::readStepCommon(nodeFrom, content, nodeTo) + DataFlowPrivate::readStepCommon(nodeFrom, content, nodeTo) and + // Since we only support one level of content in type-trackers we don't actually + // support `(aa, ab), (ba, bb) = ...`. Therefore we exclude the read-step from `(aa, + // ab)` to `aa` (since it is not needed). + not exists(SequenceNode outer | + outer.getAnElement() = nodeFrom.asCfgNode() and + IterableUnpacking::iterableUnpackingTupleFlowStep(_, nodeFrom) + ) and + // Again, due to only supporting one level deep, for `for (k,v) in ...` we exclude read-step from + // the tuple to `k` and `v`. + not exists(DataFlowPublic::IterableSequenceNode seq, DataFlowPublic::IterableElementNode elem | + IterableUnpacking::iterableUnpackingForReadStep(_, _, seq) and + IterableUnpacking::iterableUnpackingConvertingReadStep(seq, _, elem) and + IterableUnpacking::iterableUnpackingConvertingStoreStep(elem, _, nodeFrom) and + nodeFrom.asCfgNode() instanceof SequenceNode + ) or TypeTrackerSummaryFlow::basicLoadStep(nodeFrom, nodeTo, content) }