Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions python/ql/consistency-queries/DataFlowConsistency.ql
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ private module Input implements InputSig<Location, PythonDataFlow> {
// parameter, but dataflow-consistency queries should _not_ complain about there not
// being a post-update node for the synthetic `**kwargs` parameter.
n instanceof SynthDictSplatParameterNode
or
Private::Conversions::readStep(n, _, _)
}

predicate uniqueParameterNodePositionExclude(DataFlowCallable c, ParameterPosition pos, Node p) {
Expand Down
202 changes: 49 additions & 153 deletions python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
* As of 2024-04-02 the type-tracking library only supports precise content, so there is
* no reason to include steps for list content right now.
*/
predicate storeStepCommon(Node nodeFrom, Content c, Node nodeTo) {
predicate storeStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
tupleStoreStep(nodeFrom, c, nodeTo)
or
dictStoreStep(nodeFrom, c, nodeTo)
Expand All @@ -767,31 +767,29 @@ predicate storeStepCommon(Node nodeFrom, Content c, Node nodeTo) {
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
* content `c`.
*/
predicate storeStep(Node nodeFrom, ContentSet cs, Node nodeTo) {
exists(Content c | cs = singleton(c) |
storeStepCommon(nodeFrom, c, nodeTo)
or
listStoreStep(nodeFrom, c, nodeTo)
or
setStoreStep(nodeFrom, c, nodeTo)
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
or
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
or
yieldStoreStep(nodeFrom, c, nodeTo)
or
VariableCapture::storeStep(nodeFrom, c, nodeTo)
)
predicate storeStep(Node nodeFrom, ContentSet c, Node nodeTo) {
storeStepCommon(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), cs,
listStoreStep(nodeFrom, c, nodeTo)
or
setStoreStep(nodeFrom, c, nodeTo)
or
attributeStoreStep(nodeFrom, c, nodeTo)
or
matchStoreStep(nodeFrom, c, nodeTo)
or
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c,
nodeTo.(FlowSummaryNode).getSummaryNode())
or
synthStarArgsElementParameterNodeStoreStep(nodeFrom, c, nodeTo)
or
synthDictSplatArgumentNodeStoreStep(nodeFrom, c, nodeTo)
or
yieldStoreStep(nodeFrom, c, nodeTo)
or
VariableCapture::storeStep(nodeFrom, c, nodeTo)
}

/**
Expand Down Expand Up @@ -987,7 +985,7 @@ predicate attributeStoreStep(Node nodeFrom, AttributeContent c, Node nodeTo) {
/**
* Subset of `readStep` that should be shared with type-tracking.
*/
predicate readStepCommon(Node nodeFrom, Content c, Node nodeTo) {
predicate readStepCommon(Node nodeFrom, ContentSet c, Node nodeTo) {
subscriptReadStep(nodeFrom, c, nodeTo)
or
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
Expand All @@ -996,25 +994,21 @@ predicate readStepCommon(Node nodeFrom, Content c, Node nodeTo) {
/**
* Holds if data can flow from `nodeFrom` to `nodeTo` via a read of content `c`.
*/
predicate readStep(Node nodeFrom, ContentSet cs, Node nodeTo) {
exists(Content c | cs = singleton(c) |
readStepCommon(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
or
forReadStep(nodeFrom, c, nodeTo)
or
attributeReadStep(nodeFrom, c, nodeTo)
or
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
)
predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
readStepCommon(nodeFrom, c, nodeTo)
or
matchReadStep(nodeFrom, c, nodeTo)
or
forReadStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), cs,
attributeReadStep(nodeFrom, c, nodeTo)
or
FlowSummaryImpl::Private::Steps::summaryReadStep(nodeFrom.(FlowSummaryNode).getSummaryNode(), c,
nodeTo.(FlowSummaryNode).getSummaryNode())
or
Conversions::readStep(nodeFrom, cs, nodeTo)
synthDictSplatParameterNodeReadStep(nodeFrom, c, nodeTo)
or
VariableCapture::readStep(nodeFrom, c, nodeTo)
}

/** Data flows from a sequence to a subscript of the sequence. */
Expand Down Expand Up @@ -1070,68 +1064,23 @@ predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo)
nodeTo.accesses(nodeFrom, c.getAttribute())
}

module Conversions {
private import semmle.python.Concepts

predicate decoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
exists(Decoding decoding |
nodeFrom = decoding.getAnInput() and
nodeTo = decoding.getOutput()
) and
c.isAnyTupleOrDictionaryElement()
}

predicate encoderReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
exists(Encoding encoding |
nodeFrom = encoding.getAnInput() and
nodeTo = encoding.getOutput()
) and
c.isAnyTupleOrDictionaryElement()
}

predicate formatReadStep(Node nodeFrom, ContentSet c, Node nodeTo) {
// % formatting
exists(BinaryExprNode fmt | fmt = nodeTo.asCfgNode() |
fmt.getOp() instanceof Mod and
fmt.getRight() = nodeFrom.asCfgNode()
) and
c.isAnyTupleElement()
or
// format_map
// see https://docs.python.org/3/library/stdtypes.html#str.format_map
nodeTo.(MethodCallNode).calls(_, "format_map") and
nodeTo.(MethodCallNode).getArg(0) = nodeFrom and
c.isAnyDictionaryElement()
}

predicate readStep(Node nodeFrom, ContentSet c, Node nodeTo) {
decoderReadStep(nodeFrom, c, nodeTo)
or
encoderReadStep(nodeFrom, c, nodeTo)
or
formatReadStep(nodeFrom, c, nodeTo)
}
}

/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, ContentSet cs) {
exists(Content c | cs = singleton(c) |
matchClearStep(n, c)
or
attributeClearStep(n, c)
or
dictClearStep(n, c)
or
dictSplatParameterNodeClearStep(n, c)
or
VariableCapture::clearsContent(n, c)
)
predicate clearsContent(Node n, ContentSet c) {
matchClearStep(n, c)
or
attributeClearStep(n, c)
or
dictClearStep(n, c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), c)
or
FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), cs)
dictSplatParameterNodeClearStep(n, c)
or
VariableCapture::clearsContent(n, c)
}

/**
Expand Down Expand Up @@ -1249,65 +1198,12 @@ predicate allowParameterReturnInSelf(ParameterNode p) {
)
}

bindingset[s]
private string getFirstChar(string s) {
result =
min(int i, string c |
c = s.charAt(i) and c != "_"
or
c = "" and i = s.length()
|
c order by i
)
}

private string getAttributeContentFirstChar(AttributeContent ac) {
result = getFirstChar(ac.getAttribute())
}

private string getDictionaryElementContentKeyFirstChar(DictionaryElementContent dec) {
result = getFirstChar(dec.getKey())
}

private newtype TContentApprox =
TListElementContentApprox() or
TSetElementContentApprox() or
TTupleElementContentApprox() or
TDictionaryElementContentApprox(string first) {
first = "" // for `TDictionaryElementAnyContent`
or
first = getDictionaryElementContentKeyFirstChar(_)
} or
TAttributeContentApprox(string first) { first = getAttributeContentFirstChar(_) } or
TCapturedVariableContentApprox()

/** An approximated `Content`. */
class ContentApprox extends TContentApprox {
/** Gets a textual representation of this element. */
string toString() { result = "" }
}
class ContentApprox = Unit;

/** Gets an approximated value for content `c`. */
ContentApprox getContentApprox(Content c) {
c = TListElementContent() and
result = TListElementContentApprox()
or
c = TSetElementContent() and
result = TSetElementContentApprox()
or
c = TTupleElementContent(_) and
result = TTupleElementContentApprox()
or
result = TDictionaryElementContentApprox(getDictionaryElementContentKeyFirstChar(c))
or
c = TDictionaryElementAnyContent() and
result = TDictionaryElementContentApprox("")
or
result = TAttributeContentApprox(getAttributeContentFirstChar(c))
or
c = TCapturedVariableContent(_) and
result = TCapturedVariableContentApprox()
}
pragma[inline]
ContentApprox getContentApprox(Content c) { any() }

/** Helper for `.getEnclosingCallable`. */
DataFlowCallable getCallableScope(Scope s) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -898,78 +898,19 @@ class CapturedVariableContent extends Content, TCapturedVariableContent {
override string getMaDRepresentation() { none() }
}

/**
* An entity that represents a set of `Content`s.
*
* Most `ContentSet`s are singletons (i.e. they consist of a single `Content`),
* but `AnyDictionaryElement` and `AnyTupleElement` act as wildcards on the
* read side: a read at such a `ContentSet` matches any specific dictionary
* key / tuple index store, as well as (for dictionaries) the
* "unknown-bucket" Content `DictionaryElementAnyContent`.
*
* Keeping these as wildcard `ContentSet`s (rather than enumerating one
* `ContentSet` per key/index) keeps the dataflow `readSetEx` relation small
* when implicit reads are used (e.g. at sinks via `defaultImplicitTaintRead`).
*/
private newtype TContentSet =
TSingletonContent(Content c) or
TAnyTupleElement() or
TAnyDictionaryElement() or
TAnyTupleOrDictionaryElement()

/**
* An entity that represents a set of `Content`s.
*
* The set may be interpreted differently depending on whether it is
* stored into (`getAStoreContent`) or read from (`getAReadContent`).
*/
class ContentSet extends TContentSet {
/** Holds if this content set is the singleton `{c}`. */
predicate isSingleton(Content c) { this = TSingletonContent(c) }

/** Holds if this content set is the wildcard for all tuple elements. */
predicate isAnyTupleElement() { this = TAnyTupleElement() }

/** Holds if this content set is the wildcard for all dictionary elements. */
predicate isAnyDictionaryElement() { this = TAnyDictionaryElement() }

/** Holds if this content set is the wildcard for all tuple elements or dictionary elements. */
predicate isAnyTupleOrDictionaryElement() { this = TAnyTupleOrDictionaryElement() }

class ContentSet instanceof Content {
/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { this = TSingletonContent(result) }
Content getAStoreContent() { result = this }

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() {
this = TSingletonContent(result)
or
// Wildcard expansion: a read at "any tuple element" matches a store at any
// specific tuple index. (Stores always target a specific index, so we don't
// need a `TupleElementAnyContent` Content kind here.)
this = TAnyTupleElement() and result instanceof TupleElementContent
or
this = TAnyDictionaryElement() and
(result instanceof DictionaryElementContent or result instanceof DictionaryElementAnyContent)
or
this = TAnyTupleOrDictionaryElement() and
(
result instanceof TupleElementContent or
result instanceof DictionaryElementContent or
result instanceof DictionaryElementAnyContent
)
}
Content getAReadContent() { result = this }

/** Gets a textual representation of this content set. */
string toString() {
exists(Content c | this = TSingletonContent(c) | result = c.toString())
or
this = TAnyTupleElement() and result = "Any tuple element"
or
this = TAnyDictionaryElement() and result = "Any dictionary element"
or
this = TAnyTupleOrDictionaryElement() and result = "Any tuple or dictionary element"
}
string toString() { result = super.toString() }
}

/** Gets the singleton `ContentSet` wrapping the `Content` `c`. */
ContentSet singleton(Content c) { result = TSingletonContent(c) }
Loading
Loading