Skip to content

Commit

Permalink
Merge pull request #15711 from RasmusWL/tt-content
Browse files Browse the repository at this point in the history
Python: Add type tracking for content
  • Loading branch information
yoff authored Apr 9, 2024
2 parents 44fba68 + a22b994 commit 1048cf7
Show file tree
Hide file tree
Showing 13 changed files with 359 additions and 83 deletions.
11 changes: 7 additions & 4 deletions python/ql/consistency-queries/TypeTrackingConsistency.ql
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,18 @@ private module ConsistencyChecksInput implements ConsistencyChecksInputSig {
TypeTrackingInput::simpleLocalSmallStep*(m, n)
)
or
// TODO: when adding support for proper content, handle iterable unpacking better
// such as `for k,v in items:`, or `a, (b,c) = ...`
n instanceof DataFlow::IterableSequenceNode
or
// We have missing use-use flow in
// https://github.com/python/cpython/blob/0fb18b02c8ad56299d6a2910be0bab8ad601ef24/Lib/socketserver.py#L276-L303
// which I couldn't just fix. We ignore the problems here, and instead rely on the
// test-case added in https://github.com/github/codeql/pull/15841
n.getLocation().getFile().getAbsolutePath().matches("%/socketserver.py")
or
// for iterable unpacking like `a,b = some_list`, we currently don't want to allow
// type-tracking... however, in the future when we allow tracking list indexes
// precisely (that is, move away from ListElementContent), we should ensure we have
// proper flow to the synthetic `IterableElementNode`.
exists(DataFlow::ListElementContent c) and
n instanceof DataFlow::IterableElementNode
}
}

Expand Down
4 changes: 4 additions & 0 deletions python/ql/lib/change-notes/2024-03-12-typetracking-content.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Improved the type-tracking capabilities (and therefore also API graphs) to allow tracking items in tuples and dictionaries.
22 changes: 15 additions & 7 deletions python/ql/lib/semmle/python/dataflow/new/TypeTracking.qll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@

private import internal.TypeTrackingImpl as Impl
import Impl::Shared::TypeTracking<Impl::TypeTrackingInput>
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic

/** A string that may appear as the name of an attribute or access path. */
class AttributeName = Impl::TypeTrackingInput::Content;
/**
* DEPRECATED.
*
* A string that may appear as the name of an attribute or access path.
*/
deprecated class AttributeName = Impl::TypeTrackingInput::Content;

/**
* A summary of the steps needed to track a value to a given dataflow node.
Expand Down Expand Up @@ -40,17 +45,20 @@ class TypeTracker extends Impl::TypeTracker {
* Holds if this is the starting point of type tracking, and the value starts in the attribute named `attrName`.
* The type tracking only ends after the attribute has been loaded.
*/
predicate startInAttr(string attrName) { this.startInContent(attrName) }
predicate startInAttr(string attrName) {
exists(DataFlowPublic::AttributeContent content | content.getAttribute() = attrName |
this.startInContent(content)
)
}

/**
* INTERNAL. DO NOT USE.
*
* Gets the attribute associated with this type tracker.
*/
string getAttr() {
result = this.getContent().asSome()
or
this.getContent().isNone() and
result = ""
if this.getContent().asSome() instanceof DataFlowPublic::AttributeContent
then result = this.getContent().asSome().(DataFlowPublic::AttributeContent).getAttribute()
else result = ""
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -641,25 +641,39 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
//--------
// Field flow
//--------
/**
* Subset of `storeStep` that should be shared with type-tracking.
*
* NOTE: This does not include attributeStoreStep right now, since it has its' own
* modeling in the type-tracking library (which is slightly different due to
* PostUpdateNodes).
*
* As of 2024-04-02 the type-tracking library only supports precise content, so there is
* no reason to include steps for list content right now.
*/
predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
tupleStoreStep(nodeFrom, c, nodeTo)
or
dictStoreStep(nodeFrom, c, nodeTo)
or
moreDictStoreSteps(nodeFrom, c, nodeTo)
or
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
}

/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
* content `c`.
*/
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
storeStepCommon(nodeFrom, c, nodeTo)
or
listStoreStep(nodeFrom, c, nodeTo)
or
setStoreStep(nodeFrom, c, nodeTo)
or
tupleStoreStep(nodeFrom, c, nodeTo)
or
dictStoreStep(nodeFrom, c, nodeTo)
or
moreDictStoreSteps(nodeFrom, c, nodeTo)
or
comprehensionStoreStep(nodeFrom, c, nodeTo)
or
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
Expand Down Expand Up @@ -892,12 +906,19 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) {
}

/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
* Subset of `readStep` that should be shared with type-tracking.
*/
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
}

/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
*/
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
readStepCommon(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
or
Expand Down
46 changes: 36 additions & 10 deletions python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/** Step Summaries and Type Tracking */

private import TypeTrackerSpecific
private import semmle.python.dataflow.new.internal.DataFlowPublic as DataFlowPublic

cached
private module Cached {
Expand All @@ -12,10 +13,22 @@ private module Cached {
LevelStep() or
CallStep() or
ReturnStep() or
deprecated StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or
deprecated LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
deprecated StoreStep(TypeTrackerContent content) {
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
basicStoreStep(_, _, dfc)
)
} or
deprecated LoadStep(TypeTrackerContent content) {
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
basicLoadStep(_, _, dfc)
)
} or
deprecated LoadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
basicLoadStoreStep(_, _, load, store)
exists(DataFlowPublic::AttributeContent dfcLoad, DataFlowPublic::AttributeContent dfcStore |
dfcLoad.getAttribute() = load and dfcStore.getAttribute() = store
|
basicLoadStoreStep(_, _, dfcLoad, dfcStore)
)
} or
deprecated WithContent(ContentFilter filter) { basicWithContentStep(_, _, filter) } or
deprecated WithoutContent(ContentFilter filter) { basicWithoutContentStep(_, _, filter) } or
Expand All @@ -29,13 +42,13 @@ private module Cached {
// Restrict `content` to those that might eventually match a load.
// We can't rely on `basicStoreStep` since `startInContent` might be used with
// a content that has no corresponding store.
exists(TypeTrackerContent loadContents |
exists(DataFlowPublic::AttributeContent loadContents |
(
basicLoadStep(_, _, loadContents)
or
basicLoadStoreStep(_, _, loadContents, _)
) and
compatibleContents(content, loadContents)
compatibleContents(content, loadContents.getAttribute())
)
}

Expand All @@ -45,13 +58,13 @@ private module Cached {
content = noContent()
or
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
exists(TypeTrackerContent storeContent |
exists(DataFlowPublic::AttributeContent storeContent |
(
basicStoreStep(_, _, storeContent)
or
basicLoadStoreStep(_, _, _, storeContent)
) and
compatibleContents(storeContent, content)
compatibleContents(storeContent.getAttribute(), content)
)
}

Expand Down Expand Up @@ -198,7 +211,10 @@ private module Cached {
flowsToStoreStep(nodeFrom, nodeTo, content) and
summary = StoreStep(content)
or
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
basicLoadStep(nodeFrom, nodeTo, dfc)
) and
summary = LoadStep(content)
)
or
exists(TypeTrackerContent loadContent, TypeTrackerContent storeContent |
Expand Down Expand Up @@ -281,7 +297,12 @@ deprecated private predicate smallstepProj(Node nodeFrom, StepSummary summary) {
deprecated private predicate flowsToStoreStep(
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content
) {
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
exists(Node obj |
nodeTo.flowsTo(obj) and
exists(DataFlowPublic::AttributeContent dfc | dfc.getAttribute() = content |
basicStoreStep(nodeFrom, obj, dfc)
)
)
}

/**
Expand All @@ -292,7 +313,12 @@ deprecated private predicate flowsToLoadStoreStep(
TypeTrackerContent storeContent
) {
exists(Node obj |
nodeTo.flowsTo(obj) and basicLoadStoreStep(nodeFrom, obj, loadContent, storeContent)
nodeTo.flowsTo(obj) and
exists(DataFlowPublic::AttributeContent loadDfc, DataFlowPublic::AttributeContent storeDfc |
loadDfc.getAttribute() = loadContent and storeDfc.getAttribute() = storeContent
|
basicLoadStoreStep(nodeFrom, obj, loadDfc, storeDfc)
)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ deprecated class OptionalTypeTrackerContent extends string {
OptionalTypeTrackerContent() {
this = ""
or
this instanceof TypeTrackingImpl::TypeTrackingInput::Content
this = any(DataFlowPublic::AttributeContent dfc).getAttribute()
}
}

Expand Down
Loading

0 comments on commit 1048cf7

Please sign in to comment.